001    package edu.upenn.cis.propbank_shen;
002    
003    import java.io.*;
004    import java.util.LinkedList;
005    import java.util.List;
006    import java.util.Iterator;
007    
008    import javax.xml.parsers.DocumentBuilder;
009    import javax.xml.parsers.DocumentBuilderFactory;
010    import javax.xml.parsers.FactoryConfigurationError;
011    import javax.xml.parsers.ParserConfigurationException;
012    
013    import org.xml.sax.SAXException;
014    import org.w3c.dom.Document;
015    import org.w3c.dom.Node;
016    
017    
018    /**
019       This is an interface to a frameset as defined in the propbank 
020       lexical guidelines.
021    
022       A frameset is associated with a single verb, and contains 
023       a list of "predicates", defined to be the verb itself plus
024       any phrasal variants which constitute a distinct meaning.
025    
026       @author Scott Cotton
027       @see edu.upenn.cis.propbank_shen.Predicate
028       @see edu.upenn.cis.propbank_shen.RoleSet
029       @see edu.upenn.cis.propbank_shen.Example
030       @see edu.upenn.cis.propbank_shen.Role
031    
032     */
033    public class FrameSet {
034    
035        /** the root form of the verb */
036        protected String verb;
037        /** the xml document associated with the verb */
038        protected Document doc;
039        /** a list of the associated predicates for this verb 
040            @see edu.upenn.cis.propbank_shen.Predicate */
041        protected List predicates;
042    
043        /**
044           construct a FrameSet object from the root form of a verb
045           
046           This constructor reads an xml file from disk and always returns
047           the same object from the same arguments.
048        */
049        public FrameSet(String v) throws CorruptDataException
050        {
051            predicates = new LinkedList();
052            verb = v;
053            DocumentBuilderFactory dbf = null;
054            DocumentBuilder db = null;
055            try {
056                dbf = DocumentBuilderFactory.newInstance();
057                db = dbf.newDocumentBuilder();
058                doc = db.parse(FrameSet.getPath(verb));
059                Node n = doc.getDocumentElement().getFirstChild();
060                while (n != null) {
061                    if (n.getNodeName().equals("predicate")) {
062                        predicates.add(new Predicate(n));
063                    }
064                    n = n.getNextSibling();
065                }
066            } catch (ParserConfigurationException pce) {
067                System.err.println("Parser config error: " + pce);
068            } catch (FactoryConfigurationError fce) {
069                System.err.println("Factory configuration error: " + fce);
070            } catch (SAXException se) {
071                throw new CorruptDataException("Bad frames file for "+v+".");
072            } catch (IOException ie) {
073                if (System.getProperty("TREEBANKDIR") == null ||
074                    System.getProperty("FRAMEDIR") == null) {
075                    throw new CorruptDataException
076                        (ie.getMessage()+ "; Couldn't find "+v+
077                         "; System properties TREEBANKDIR and "+
078                         "FRAMEDIR must be set properly.");
079                }
080                // otherwise maybe a misspelled verb in propbank, like 'instal'
081                throw new CorruptDataException(ie.getMessage() + 
082                                               "; Couldn't find "+v);
083            }
084        }
085        /**
086           Find the file for the lexical guidelines for a verb.
087           @param verb the verb whose file needs finding.
088        */
089        public static File getPath(String verb) 
090        {
091            return new File(PBConfig.FrameDir() + File.separator + verb + ".xml");
092        }
093    
094        /**
095           return a list of Predicate objects see
096           @see edu.upenn.cis.propbank_shen.Predicate
097         */
098        public List getPredicates() 
099        {
100            return predicates;
101        }
102        
103        /**
104           return the verb associated with this FrameSet 
105        */
106        public String getVerb() 
107        {
108            return verb;
109        }
110    
111        /** a little unit test */
112        public static void main(String args[]) throws CorruptDataException
113        {
114            FrameSet fs = null;
115            if (args.length > 0) {
116                fs = new FrameSet(args[0]);
117            } else {
118                fs = new FrameSet("go");
119            }
120            List foo = fs.getPredicates();
121            Iterator i = foo.iterator();
122            while(i.hasNext()) {
123                Predicate p = (Predicate) i.next();
124                System.out.println(p.getLemma());
125            }
126        }
127    }
128