问题
I was recommended to use one of the APIs (Jena, OpenRDF or Protege) to convert the outputs that I generated from OpenIE4.1 jar file (downloadable from http://knowitall.github.io/openie/). The following is the sample OpenIE4.1 output format: confidence score followed by subject, predicate, object triplet
The rail launchers are conceptually similar to the underslung SM-1
0.93 (The rail launchers; are; conceptually similar to the underslung SM-1)
I planned to produce triples that follow this pattern from above output (in fact, hundreds of above outputs have been generated by processing a sets of free text documents, only confidence score greater than certain value will be processed):
Given
subject: The rail launchers
predicate: are
object: conceptually similar to the underslung SM-1
(confidence score is ignored)
- Create a blank node identifier for each distinct :subject in the file (let’s call it bnode_s)
- Create a blank node identifier for each distinct :object in the file (let’s call it bnode_o)
- Define a URI for each distinct predicate
BTW, I do have outputs that produce more than triplets, for instance John gave Mary a birthday gift (John; gave; Mary; a birthday gift) which is more complicated to product RDF triplet.
However, I'm not familiar with any of the API mentioned above and don't know the input format that API may take.
回答1:
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.Property;
import com.hp.hpl.jena.rdf.model.Resource;
import com.hp.hpl.jena.util.URIref;
import com.hp.hpl.jena.vocabulary.RDF;
import com.hp.hpl.jena.vocabulary.RDFS;
import com.hp.hpl.jena.vocabulary.XSD;
public class OpenIETripletConversionExample {
public static void main(String[] args) {
// Create the model and define some prefixes (for nice serialization in RDF/XML and TTL)
Model model = ModelFactory.createDefaultModel();
String NS = "http://stackoverflow.com/q/24897405/1281433/";
model.setNsPrefix( "", NS );
model.setNsPrefix( "rdf", RDF.getURI() );
model.setNsPrefix( "xsd", XSD.getURI() );
model.setNsPrefix( "rdfs", RDFS.getURI() );
// Preserve the confidence level (optional).
Property confidence = model.createProperty( NS+"confidence" );
// Define some triplets to convert.
Object[][] triplets = {
{ 0.57, "The quick brown fox", "jumped", "over the lazy dog." },
{ 0.93, "The rail launchers", "are", "conceptually similar to the underslung SM-1." }
};
// For each triplet, create a resource representing the sentence, as well as the subject,
// predicate, and object, and then add the triples to the model.
for ( Object[] triplet : triplets ) {
Resource statement = model.createResource();
Resource subject = model.createResource().addProperty( RDFS.label, (String) triplet[1] );
Property predicate = model.createProperty( NS+URIref.encode( (String) triplet[2] ));
Resource object = model.createResource().addProperty( RDFS.label, (String) triplet[3] );
statement.addLiteral( confidence, triplet[0] );
statement.addProperty( RDF.subject, subject );
statement.addProperty( RDF.predicate, predicate );
statement.addProperty( RDF.object, object );
}
// Show the model in a few different formats.
RDFDataMgr.write( System.out, model, Lang.TTL );
RDFDataMgr.write( System.out, model, Lang.RDFXML );
RDFDataMgr.write( System.out, model, Lang.NTRIPLES );
}
}
@prefix : <http://stackoverflow.com/q/24897405/1281433/> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
[ rdf:object [ rdfs:label "conceptually similar to the underslung SM-1." ] ;
rdf:predicate :are ;
rdf:subject [ rdfs:label "The rail launchers" ] ;
:confidence "0.93"^^xsd:double
] .
[ rdf:object [ rdfs:label "over the lazy dog." ] ;
rdf:predicate :jumped ;
rdf:subject [ rdfs:label "The quick brown fox" ] ;
:confidence "0.57"^^xsd:double
] .
<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns="http://stackoverflow.com/q/24897405/1281433/"
xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
<rdf:Description>
<rdf:object rdf:parseType="Resource">
<rdfs:label>conceptually similar to the underslung SM-1.</rdfs:label>
</rdf:object>
<rdf:predicate rdf:resource="http://stackoverflow.com/q/24897405/1281433/are"/>
<rdf:subject rdf:parseType="Resource">
<rdfs:label>The rail launchers</rdfs:label>
</rdf:subject>
<confidence rdf:datatype="http://www.w3.org/2001/XMLSchema#double"
>0.93</confidence>
</rdf:Description>
<rdf:Description>
<rdf:object rdf:parseType="Resource">
<rdfs:label>over the lazy dog.</rdfs:label>
</rdf:object>
<rdf:predicate rdf:resource="http://stackoverflow.com/q/24897405/1281433/jumped"/>
<rdf:subject rdf:parseType="Resource">
<rdfs:label>The quick brown fox</rdfs:label>
</rdf:subject>
<confidence rdf:datatype="http://www.w3.org/2001/XMLSchema#double"
>0.57</confidence>
</rdf:Description>
</rdf:RDF>
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffe <http://www.w3.org/2000/01/rdf-schema#label> "The quick brown fox" .
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffc <http://www.w3.org/1999/02/22-rdf-syntax-ns#object> _:BX2D492663e1X3A1475ff7864cX3AX2D7ffa .
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffc <http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate> <http://stackoverflow.com/q/24897405/1281433/are> .
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffc <http://www.w3.org/1999/02/22-rdf-syntax-ns#subject> _:BX2D492663e1X3A1475ff7864cX3AX2D7ffb .
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffc <http://stackoverflow.com/q/24897405/1281433/confidence> "0.93"^^<http://www.w3.org/2001/XMLSchema#double> .
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffa <http://www.w3.org/2000/01/rdf-schema#label> "conceptually similar to the underslung SM-1." .
_:BX2D492663e1X3A1475ff7864cX3AX2D7fff <http://www.w3.org/1999/02/22-rdf-syntax-ns#object> _:BX2D492663e1X3A1475ff7864cX3AX2D7ffd .
_:BX2D492663e1X3A1475ff7864cX3AX2D7fff <http://www.w3.org/1999/02/22-rdf-syntax-ns#predicate> <http://stackoverflow.com/q/24897405/1281433/jumped> .
_:BX2D492663e1X3A1475ff7864cX3AX2D7fff <http://www.w3.org/1999/02/22-rdf-syntax-ns#subject> _:BX2D492663e1X3A1475ff7864cX3AX2D7ffe .
_:BX2D492663e1X3A1475ff7864cX3AX2D7fff <http://stackoverflow.com/q/24897405/1281433/confidence> "0.57"^^<http://www.w3.org/2001/XMLSchema#double> .
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffd <http://www.w3.org/2000/01/rdf-schema#label> "over the lazy dog." .
_:BX2D492663e1X3A1475ff7864cX3AX2D7ffb <http://www.w3.org/2000/01/rdf-schema#label> "The rail launchers" .
回答2:
For completeness (since the OP asked about several APIs) I am repeating @Joshua Taylor's solution but using the OpenRDF Sesame API instead of Jena:
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import org.openrdf.model.Model;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.ValueFactory;
import org.openrdf.model.impl.LinkedHashModel;
import org.openrdf.model.impl.ValueFactoryImpl;
import org.openrdf.model.util.Literals;
import org.openrdf.model.vocabulary.RDF;
import org.openrdf.model.vocabulary.RDFS;
import org.openrdf.model.vocabulary.XMLSchema;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.Rio;
public class OpenIETripletConversionExample {
public static void main(String[] args) throws UnsupportedEncodingException, RDFHandlerException {
// Create the model and define some prefixes (for nice serialization in RDF/XML and TTL)
Model model = new LinkedHashModel();
String NS = "http://stackoverflow.com/q/24897405/1281433/";
model.setNamespace("rdf", RDF.NAMESPACE);
model.setNamespace("rdfs", RDFS.NAMESPACE);
model.setNamespace("xsd", XMLSchema.NAMESPACE);
model.setNamespace("", NS);
// Create a ValueFactory we can use to create resources and statements
ValueFactory vf = ValueFactoryImpl.getInstance();
// Preserve the confidence level (optional).
URI confidence = vf.createURI(NS, "confidence");
// Define some triplets to convert.
Object[][] triplets = {
{ 0.57, "The quick brown fox", "jumped", "over the lazy dog." },
{ 0.93, "The rail launchers", "are", "conceptually similar to the underslung SM-1." }
};
// For each triplet, create a resource representing the sentence, as well as the subject,
// predicate, and object, and then add the triples to the model.
for ( Object[] triplet : triplets ) {
Resource sentence = vf.createBNode();
Resource subject = vf.createBNode();
URI predicate = vf.createURI(NS, URLEncoder.encode((String) triplet[2], "utf-8"));
Resource object = vf.createBNode();
model.add(subject, RDFS.LABEL, Literals.createLiteral(vf, triplet[1]));
model.add(object, RDFS.LABEL, Literals.createLiteral(vf, (String)triplet[3]));
model.add(sentence, confidence, Literals.createLiteral(vf, triplet[0]));
model.add(sentence, RDF.SUBJECT, subject);
model.add(sentence, RDF.PREDICATE, predicate);
model.add(sentence, RDF.OBJECT, object);
}
// Show the model in a few different formats.
Rio.write(model, System.out, RDFFormat.TURTLE);
Rio.write(model, System.out, RDFFormat.RDFXML);
Rio.write(model, System.out, RDFFormat.NTRIPLES);
}
}
来源:https://stackoverflow.com/questions/24897405/use-rdf-api-jena-openrdf-or-protege-to-convert-openie-outputs