Posts about software

Code for a Friend

November 3rd, 2008

And so she had a problem. Her script was
in a non traditional format. And I just finished
spending 4 hours coding a solution that would
transform her format into screen play style.
Luckily, from now on she can use Celtx but I like
to think the practice I got with regular expressions
both helped me and her save some time in the future.

 
"""
Scriptizer
Chris B Stones
Nov 2, 2008 Time: 1.5 hours
Nov 3, 2008 Time: 2.5 hours
and counting...
 
TOTAL: 4 hours
 
This is my good will program that can parse a regular script
format into a more normlized version.
Since they're regular formated items it isn't hard to get
most of it working
 
process
1 Collect a sample of the text that encopasse the main elements
2 demo thatin script format and export
3 Now break down what needs to be parsed
4 write run and rewrite the parser
5 once it mostly works run it on the whole script
6 Return the format to Aimee
 
Celebrate. 
 
The whole thing rests on the fact that the format is predictiable.
And only certain parts need to be fit.
"""
 
# Toggle which file we are testing with
name = "textonly.txt"
#name = "snip.txt"
raw_script = open(name,'r').read()
 
# Parse on opening lines with CAPS
# should wind up with blocks of text we leave alone and
# portions we rewrite and replace 
 
import re
 
# Char Dialog "^[A-Z]* |^[A-Z]*:"
pat = re.compile("^[A-Z]* .*:\n.*|^[A-Z]*:\n.*",re.M)
 
# easy way to split and recombine
script_parts = re.compile("^[A-Z]* .*:\n.*|^[A-Z]*:\n.*",re.M).split(raw_script)
 
char_names = re.findall(pat,raw_script)
 
# Right Justify text
def right_just(d):
	bites = []
	chew = d
	done_chewing = False
	#begin chewing up string
	while not done_chewing:
		bite = chew[:35]
		chew = chew[35:]
		if len(bite) < 35:
			#we are done
			bites.append(" "*10+bite)
			done_chewing = True
		else:
			if not bite[34].isalpha():
				bites.append(" "*10+bite)
			else:
				# is alpha char
				words = bite.split(" ")
				# place every word except for the last one
				wline = words[:len(words)-1]
				line = ""
				for w in wline:
					line += w+" "
				bites.append(" "*10+line)
				chew = words[len(words)-1] + chew
 
	return bites
 
# process and return formated
def process_str(s):
	out = "" #output strings
	p = False
	# break up and return parts as formated string
	obtain_name   = re.compile("^[A-Z]* |^[A-Z]*:",re.M)
	until_colon   = re.compile(".*:",re.M)
	rest_of_str   = re.compile(":\n.",re.M)  # ?
	between_paras = re.compile("(.*)",re.M)   # use only if they exist
 
	# split string into before and after :
	the_first_two = s.split(":")
	before = the_first_two[0]
	after = the_first_two[1]
	#before,after = s.split(":") # stupid clock line!
	name = re.findall(obtain_name,s)[0]
	if "(" in before:
		p = True
		before = obtain_name.split(before)[1]
		parathentical = re.findall(between_paras,before)[0]
 
	# NOW we have the parts so print format correctly
	if ":" in name:
		out += " "*20+name.strip(":")+"\n"
	else:
		out += " "*20+name
	if p:
		out += "\n"+" "*15+parathentical+"\n"
	for line in right_just(after.strip("\n")):
		out += line+"\n"
 
	return out
 
#for s,part in zip(char_names,script_parts):
#	print part
#	print process_str(s)
 
# final output
output = []
for s,part in zip(char_names,script_parts):
	output.append(part)
	output.append(process_str(s))
 
str_out = ""
for l in output:
	print l
	str_out += l
 
file_out = open("SCRIPT.txt",'w')
file_out.write(str_out)
file_out.close()