001 package edu.upenn.cis.propbank_shen;
002
003 import java.io.*;
004 import java.util.LinkedList;
005 import java.util.List;
006 import java.util.Iterator;
007
008 import javax.xml.parsers.DocumentBuilder;
009 import javax.xml.parsers.DocumentBuilderFactory;
010 import javax.xml.parsers.FactoryConfigurationError;
011 import javax.xml.parsers.ParserConfigurationException;
012
013 import org.xml.sax.SAXException;
014 import org.w3c.dom.Document;
015 import org.w3c.dom.Node;
016
017
018 /**
019 This is an interface to a frameset as defined in the propbank
020 lexical guidelines.
021
022 A frameset is associated with a single verb, and contains
023 a list of "predicates", defined to be the verb itself plus
024 any phrasal variants which constitute a distinct meaning.
025
026 @author Scott Cotton
027 @see edu.upenn.cis.propbank_shen.Predicate
028 @see edu.upenn.cis.propbank_shen.RoleSet
029 @see edu.upenn.cis.propbank_shen.Example
030 @see edu.upenn.cis.propbank_shen.Role
031
032 */
033 public class FrameSet {
034
035 /** the root form of the verb */
036 protected String verb;
037 /** the xml document associated with the verb */
038 protected Document doc;
039 /** a list of the associated predicates for this verb
040 @see edu.upenn.cis.propbank_shen.Predicate */
041 protected List predicates;
042
043 /**
044 construct a FrameSet object from the root form of a verb
045
046 This constructor reads an xml file from disk and always returns
047 the same object from the same arguments.
048 */
049 public FrameSet(String v) throws CorruptDataException
050 {
051 predicates = new LinkedList();
052 verb = v;
053 DocumentBuilderFactory dbf = null;
054 DocumentBuilder db = null;
055 try {
056 dbf = DocumentBuilderFactory.newInstance();
057 db = dbf.newDocumentBuilder();
058 doc = db.parse(FrameSet.getPath(verb));
059 Node n = doc.getDocumentElement().getFirstChild();
060 while (n != null) {
061 if (n.getNodeName().equals("predicate")) {
062 predicates.add(new Predicate(n));
063 }
064 n = n.getNextSibling();
065 }
066 } catch (ParserConfigurationException pce) {
067 System.err.println("Parser config error: " + pce);
068 } catch (FactoryConfigurationError fce) {
069 System.err.println("Factory configuration error: " + fce);
070 } catch (SAXException se) {
071 throw new CorruptDataException("Bad frames file for "+v+".");
072 } catch (IOException ie) {
073 if (System.getProperty("TREEBANKDIR") == null ||
074 System.getProperty("FRAMEDIR") == null) {
075 throw new CorruptDataException
076 (ie.getMessage()+ "; Couldn't find "+v+
077 "; System properties TREEBANKDIR and "+
078 "FRAMEDIR must be set properly.");
079 }
080 // otherwise maybe a misspelled verb in propbank, like 'instal'
081 throw new CorruptDataException(ie.getMessage() +
082 "; Couldn't find "+v);
083 }
084 }
085 /**
086 Find the file for the lexical guidelines for a verb.
087 @param verb the verb whose file needs finding.
088 */
089 public static File getPath(String verb)
090 {
091 return new File(PBConfig.FrameDir() + File.separator + verb + ".xml");
092 }
093
094 /**
095 return a list of Predicate objects see
096 @see edu.upenn.cis.propbank_shen.Predicate
097 */
098 public List getPredicates()
099 {
100 return predicates;
101 }
102
103 /**
104 return the verb associated with this FrameSet
105 */
106 public String getVerb()
107 {
108 return verb;
109 }
110
111 /** a little unit test */
112 public static void main(String args[]) throws CorruptDataException
113 {
114 FrameSet fs = null;
115 if (args.length > 0) {
116 fs = new FrameSet(args[0]);
117 } else {
118 fs = new FrameSet("go");
119 }
120 List foo = fs.getPredicates();
121 Iterator i = foo.iterator();
122 while(i.hasNext()) {
123 Predicate p = (Predicate) i.next();
124 System.out.println(p.getLemma());
125 }
126 }
127 }
128