001 package edu.upenn.cis.propbank_shen; 002 003 import java.io.*; 004 import java.util.LinkedList; 005 import java.util.List; 006 import java.util.Iterator; 007 008 import javax.xml.parsers.DocumentBuilder; 009 import javax.xml.parsers.DocumentBuilderFactory; 010 import javax.xml.parsers.FactoryConfigurationError; 011 import javax.xml.parsers.ParserConfigurationException; 012 013 import org.xml.sax.SAXException; 014 import org.w3c.dom.Document; 015 import org.w3c.dom.Node; 016 017 018 /** 019 This is an interface to a frameset as defined in the propbank 020 lexical guidelines. 021 022 A frameset is associated with a single verb, and contains 023 a list of "predicates", defined to be the verb itself plus 024 any phrasal variants which constitute a distinct meaning. 025 026 @author Scott Cotton 027 @see edu.upenn.cis.propbank_shen.Predicate 028 @see edu.upenn.cis.propbank_shen.RoleSet 029 @see edu.upenn.cis.propbank_shen.Example 030 @see edu.upenn.cis.propbank_shen.Role 031 032 */ 033 public class FrameSet { 034 035 /** the root form of the verb */ 036 protected String verb; 037 /** the xml document associated with the verb */ 038 protected Document doc; 039 /** a list of the associated predicates for this verb 040 @see edu.upenn.cis.propbank_shen.Predicate */ 041 protected List predicates; 042 043 /** 044 construct a FrameSet object from the root form of a verb 045 046 This constructor reads an xml file from disk and always returns 047 the same object from the same arguments. 048 */ 049 public FrameSet(String v) throws CorruptDataException 050 { 051 predicates = new LinkedList(); 052 verb = v; 053 DocumentBuilderFactory dbf = null; 054 DocumentBuilder db = null; 055 try { 056 dbf = DocumentBuilderFactory.newInstance(); 057 db = dbf.newDocumentBuilder(); 058 doc = db.parse(FrameSet.getPath(verb)); 059 Node n = doc.getDocumentElement().getFirstChild(); 060 while (n != null) { 061 if (n.getNodeName().equals("predicate")) { 062 predicates.add(new Predicate(n)); 063 } 064 n = n.getNextSibling(); 065 } 066 } catch (ParserConfigurationException pce) { 067 System.err.println("Parser config error: " + pce); 068 } catch (FactoryConfigurationError fce) { 069 System.err.println("Factory configuration error: " + fce); 070 } catch (SAXException se) { 071 throw new CorruptDataException("Bad frames file for "+v+"."); 072 } catch (IOException ie) { 073 if (System.getProperty("TREEBANKDIR") == null || 074 System.getProperty("FRAMEDIR") == null) { 075 throw new CorruptDataException 076 (ie.getMessage()+ "; Couldn't find "+v+ 077 "; System properties TREEBANKDIR and "+ 078 "FRAMEDIR must be set properly."); 079 } 080 // otherwise maybe a misspelled verb in propbank, like 'instal' 081 throw new CorruptDataException(ie.getMessage() + 082 "; Couldn't find "+v); 083 } 084 } 085 /** 086 Find the file for the lexical guidelines for a verb. 087 @param verb the verb whose file needs finding. 088 */ 089 public static File getPath(String verb) 090 { 091 return new File(PBConfig.FrameDir() + File.separator + verb + ".xml"); 092 } 093 094 /** 095 return a list of Predicate objects see 096 @see edu.upenn.cis.propbank_shen.Predicate 097 */ 098 public List getPredicates() 099 { 100 return predicates; 101 } 102 103 /** 104 return the verb associated with this FrameSet 105 */ 106 public String getVerb() 107 { 108 return verb; 109 } 110 111 /** a little unit test */ 112 public static void main(String args[]) throws CorruptDataException 113 { 114 FrameSet fs = null; 115 if (args.length > 0) { 116 fs = new FrameSet(args[0]); 117 } else { 118 fs = new FrameSet("go"); 119 } 120 List foo = fs.getPredicates(); 121 Iterator i = foo.iterator(); 122 while(i.hasNext()) { 123 Predicate p = (Predicate) i.next(); 124 System.out.println(p.getLemma()); 125 } 126 } 127 } 128