Code for a Friend
November 3rd, 2008And so she had a problem. Her script was
in a non traditional format. And I just finished
spending 4 hours coding a solution that would
transform her format into screen play style.
Luckily, from now on she can use Celtx but I like
to think the practice I got with regular expressions
both helped me and her save some time in the future.
""" Scriptizer Chris B Stones Nov 2, 2008 Time: 1.5 hours Nov 3, 2008 Time: 2.5 hours and counting... TOTAL: 4 hours This is my good will program that can parse a regular script format into a more normlized version. Since they're regular formated items it isn't hard to get most of it working process 1 Collect a sample of the text that encopasse the main elements 2 demo thatin script format and export 3 Now break down what needs to be parsed 4 write run and rewrite the parser 5 once it mostly works run it on the whole script 6 Return the format to Aimee Celebrate. The whole thing rests on the fact that the format is predictiable. And only certain parts need to be fit. """ # Toggle which file we are testing with name = "textonly.txt" #name = "snip.txt" raw_script = open(name,'r').read() # Parse on opening lines with CAPS # should wind up with blocks of text we leave alone and # portions we rewrite and replace import re # Char Dialog "^[A-Z]* |^[A-Z]*:" pat = re.compile("^[A-Z]* .*:\n.*|^[A-Z]*:\n.*",re.M) # easy way to split and recombine script_parts = re.compile("^[A-Z]* .*:\n.*|^[A-Z]*:\n.*",re.M).split(raw_script) char_names = re.findall(pat,raw_script) # Right Justify text def right_just(d): bites = [] chew = d done_chewing = False #begin chewing up string while not done_chewing: bite = chew[:35] chew = chew[35:] if len(bite) < 35: #we are done bites.append(" "*10+bite) done_chewing = True else: if not bite[34].isalpha(): bites.append(" "*10+bite) else: # is alpha char words = bite.split(" ") # place every word except for the last one wline = words[:len(words)-1] line = "" for w in wline: line += w+" " bites.append(" "*10+line) chew = words[len(words)-1] + chew return bites # process and return formated def process_str(s): out = "" #output strings p = False # break up and return parts as formated string obtain_name = re.compile("^[A-Z]* |^[A-Z]*:",re.M) until_colon = re.compile(".*:",re.M) rest_of_str = re.compile(":\n.",re.M) # ? between_paras = re.compile("(.*)",re.M) # use only if they exist # split string into before and after : the_first_two = s.split(":") before = the_first_two[0] after = the_first_two[1] #before,after = s.split(":") # stupid clock line! name = re.findall(obtain_name,s)[0] if "(" in before: p = True before = obtain_name.split(before)[1] parathentical = re.findall(between_paras,before)[0] # NOW we have the parts so print format correctly if ":" in name: out += " "*20+name.strip(":")+"\n" else: out += " "*20+name if p: out += "\n"+" "*15+parathentical+"\n" for line in right_just(after.strip("\n")): out += line+"\n" return out #for s,part in zip(char_names,script_parts): # print part # print process_str(s) # final output output = [] for s,part in zip(char_names,script_parts): output.append(part) output.append(process_str(s)) str_out = "" for l in output: print l str_out += l file_out = open("SCRIPT.txt",'w') file_out.write(str_out) file_out.close()