Source for file N3Parser.php

Documentation is available at N3Parser.php

  1. <?php
  2.  
  3. // ----------------------------------------------------------------------------------
  4. // Class: N3Parser
  5. // ----------------------------------------------------------------------------------
  6.  
  7.  
  8.  
  9.  
  10.  
  11. /**
  12. * PHP Notation3 Parser
  13. *
  14. * This parser can parse a subset of n3, reporting triples to a callback function
  15. * or constructing a RAP Model ( http://www.wiwiss.fu-berlin.de/suhl/bizer/rdfapi )
  16. *
  17. * Supported N3 features:
  18. * <ul>
  19. * <li>Standard things, repeated triples ( ; and , ), blank nodes using [ ], self-reference ('<>')</li>
  20. * <li>@prefix mappings</li>
  21. * <li>= maps to owl#sameAs</li>
  22. * <li>a maps to rdf-syntax-ns#type</li>
  23. * <li>Literal datytype- and xmlLanguageTag support
  24. * </ul>
  25. * Un-supported N3 Features include:
  26. * <ul>
  27. * <li>Reification using { }</li>
  28. * <li>. and ^ operators for tree traversal</li>
  29. * <li>Any log operators, like log:forAll etc.</li>
  30. * </ul>
  31. *
  32. * This parser is based on n3.py from Epp released 2nd March, 2002.
  33. * by Sean B. Palmer
  34. * ( http://infomesh.net/2002/eep/20020302-013802/n3.py )
  35. *
  36. * This parser is released under the GNU GPL license.
  37. * ( http://www.gnu.org/licenses/gpl.txt )
  38. *
  39. * <b>History:</b>
  40. * <ul>
  41. * <LI>04-05-2005 toke() function improved by Hannes Gassert hannes.gassert@deri.org </LI>
  42. * <LI>03-25-2005 N3 list processing added by Hannes Gassert hannes.gassert@deri.org</LI>
  43. * <LI>12-06-2004 improved namespace handling added (tobias.gauss@web.de)</LI>
  44. * <LI>08-10-2004 Function for converting strings to its unicode NFC form. Benjamin Nowack <bnowack@appmosphere.com></LI>
  45. * <LI>10-05-2004 Fixed bug with trailing space on qnames and space before ] parsin bug
  46. * <LI>11-27-2003 fixed problems with whithespaces at the end of bNodes</li>
  47. * <LI>11-18-2003 Changed xml:language regex for supporting lang-tags like en-uk.</li>
  48. * <li>11-07-2003 Added "setFixBnodes" function. Sets, if Bnodes should be renamed to the BNODE_PREFIX constant.</li>
  49. * <li>10-27-2003 fixed problems in generateModel(), changed regEx for Literals.</li>
  50. * <li>10-24-2003 Added support for Literals with rdf:DataType and xml:Language Tags. URI-Self-Reference with '<>' is supported.</li>
  51. * <li>08-01-2003 Made compatible with new v6 MemModel.</li>
  52. * <li>07-31-2003 Function generateModel() added.</li>
  53. * <li>07-16-2003 Fixed bug with anon nodes alone on a line.</li>
  54. * <li>06-08-2003 Initial version converted from n3.py.</li>
  55. * </ul>
  56. *
  57. *
  58. * @author Sean B. Palmer <sean@mysterylights.com>, Gunnar AA. Grimnes <ggrimnes@csd.abdn.ac.uk>, Daniel Westphal <mail@d-westphal.de>
  59. * @version V0.9.1
  60. * @package syntax
  61. * @access public
  62. ***/
  63.  
  64. class N3Parser extends Object {
  65.  
  66.  
  67. /* ==================== Variables ==================== */
  68.  
  69. var $Tokens;
  70. var $bNode;
  71. var $RDF_NS, $DAML_NS, $OWL_NS;
  72. var $debug;
  73. var $parseError;
  74. var $parsedNamespaces = array();
  75.  
  76. /* ==================== Public Methods ==================== */
  77.  
  78. /**
  79. * Constructor
  80. * @access public
  81. ***/
  82. function N3Parser() {
  83. //Regular expressions:
  84. $Name = '[A-Za-z0-9_@\.]+[^\.,;\[\] ]*';
  85. $URI = '<[^> ]*>';
  86. $bNode = '_:'.$Name;
  87. $Univar = '\?'.$Name;
  88. $QName = '(?:[A-Za-z][A-Za-z0-9_@\.]*)?:'.$Name;
  89. $Literal = '"(\\\"|[^"])*"'; # '"(?:\\"|[^"])*"'
  90. // $Literal = '"[^"\\\\]*(?:\\.\\[^"\\]*)*"'; # '"(?:\\"|[^"])*"'
  91. $LangTag = '@[A-Za-z\-]*[^ \^\.\;\,]';
  92. $Datatype = '(\^\^)[^ ,\.;)]+';
  93. $Datatype_URI = '(\^\^)'.$URI;
  94. // $LLiteral = '"""[^"\\\\]*(?:(?:.|"(?!""))[^"\\\\]*)*"""';
  95. $LLiteral = '"""[^"\\\\]*(?:(?:\\\\.|"(?!""))[^"\\\\]*)*"""';
  96. // '"""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
  97. $Comment = '# .*$';
  98. $Prefix = '(?:[A-Za-z][A-Za-z0-9_]*)?:';
  99. $PrefixDecl = '@prefix';
  100. $WS = '[ \t]';
  101. $this->RDF_NS = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'; # for 'a' keyword
  102. $this->DAML_NS = 'http://www.daml.org/2001/03/daml+oil#'; # for '=' keyword
  103. $this->OWL_NS = 'http://www.w3.org/2002/07/owl#';
  104.  
  105. // $t = array( $LLiteral, $URI); //, $Literal, $PrefixDecl, $QName, $bNode, $Prefix,
  106. // $Univar, 'a', '{', '}', '\(', '\)', '\[', '\]', ',', ';', '\.', $WS, $Comment);
  107. $t = array( $Datatype_URI,$Datatype,$LLiteral, $URI, $Literal, $PrefixDecl, $QName, $bNode, $Prefix, $Univar, 'a','=', '{', '}', '\(', '\)', '\[', '\]', ',', ';', '\.', $WS, $Comment,$LangTag);
  108. $this->Tokens="/(".join($t,"|").")/m";
  109.  
  110. $this->bNode=0;
  111. $this->debug=0;
  112. $this->bNodeMap = array();
  113. $this->FixBnodes = FIX_BLANKNODES;
  114. $this->parseError=false;
  115. }
  116.  
  117. /**
  118. * Sets, if BlankNode labels should be replaced by the generic label from the constants.php file
  119. * default is "false" -> the used label in n3 is parsed to the model
  120. * @param boolean
  121. * @access public
  122. ***/
  123. function setFixBnodes($set) {
  124.  
  125. if (($set===true) OR ($set===false)) $this->FixBnodes = $set;
  126. }
  127. /**
  128. * This parses a N3 string and prints out the triples
  129. * @param string $s
  130. * @access public
  131. ***/
  132. function parse($s) {
  133. // """Get a string, tokenize, create list, convert to Eep store."""
  134. $stat=$this->n3tolist($s);
  135. foreach ( $stat as $t) {
  136.  
  137. if (count($t)>3) {
  138. $object=$t[2];
  139.  
  140. for ($i = 3; $i < 5; $i++){
  141. if ($t[$i][0]=='@')$object.=$t[$i];
  142. if (substr($t[$i],0,2)=='^^')$object.=$t[$i];
  143. };
  144. } else {$object=$t[2];};
  145. print '('.$t[0].', '.$t[1].', '.$object.")\n";
  146.  
  147. }
  148. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  149. // for t in n3tolist(s)]
  150. }
  151.  
  152. /**
  153. * This parses a N3 string and calls func($subject, $predicate, $object) with each trioke
  154. * @param string $s
  155. * @param string $func
  156. * @access public
  157. ***/
  158. function uparse($s,$func) {
  159. // """Get a string, tokenize, create list, convert to Eep store."""
  160. $stat=$this->n3tolist($s);
  161. foreach ( $stat as $t) {
  162.  
  163. if (count($t)>3) {
  164. $object=$t[2];
  165.  
  166. for ($i = 3; $i < 5; $i++){
  167. if ($t[$i][0]=='@')$object.=$t[$i];
  168. if (substr($t[$i],0,2)=='^^')$object.=$t[$i];
  169. };
  170. } else {$object=$t[2];};
  171. // print "(".$t[0].", ".$t[1].", ".$t[2].")";
  172. $func($t[0],$t[1],$object);
  173. }
  174. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  175. // for t in n3tolist(s)]
  176. }
  177.  
  178.  
  179. /**
  180. * This parses a N3 string and returns a memmodel
  181. * @param string $s
  182. * @access public
  183. * @return object Model
  184. ***/
  185.  
  186. function parse2model($s,$model = false) {
  187. if($model == false){
  188. $m=new MemModel();
  189. }else{
  190. $m=$model;
  191. }
  192. // """Get a string, tokenize, create list, convert to Eep store."""
  193. $stat=$this->n3tolist($s);
  194.  
  195. foreach ( $stat as $t) {
  196. $s=$this->toRDFNode($t[0],$t);
  197. $p=$this->toRDFNode($t[1],$t);
  198. $o=$this->toRDFNode($t[2],$t);
  199. $new_statement= new Statement($s,$p,$o);
  200.  
  201. $m->add($new_statement);
  202. // print "(".$t[0].", ".$t[1].", ".$t[2].")";
  203. }
  204. // return [[eep.Article(t[0]), eep.Article(t[1]), eep.Article(t[2])]
  205. // for t in n3tolist(s)]
  206. $m->addParsedNamespaces($this->parsedNamespaces);
  207. return $m;
  208. }
  209.  
  210. /**
  211. * Generate a new MemModel from an URI or file.
  212. *
  213. * @access public
  214. * @param $path
  215. * @throws PhpError
  216. * @return object MemModel
  217. */
  218. function & generateModel($path,$dummy=false,$model=false) {
  219.  
  220. $handle = fopen($path,'r') or die("N3 Parser: Could not open File: '$path' - Stopped parsing.");
  221. $done=false;
  222. $input="";
  223. while(!$done)
  224. {
  225. $input .= fread( $handle, 512 );
  226. $done = feof($handle);
  227. };
  228.  
  229. fclose($handle);
  230.  
  231. return $this->parse2model($input,$model);
  232. }
  233. /* ==================== Private Methods from here ==================== */
  234.  
  235. // General list processing functions
  236.  
  237.  
  238.  
  239. /**
  240. * Returns FALSE if argument is a whitespace character
  241. * @access private
  242. * @param string $s
  243. ***/
  244. function isWS($s) {
  245. return !preg_match('/^(#.*|\s*)$/', $s);
  246. }
  247.  
  248.  
  249.  
  250. /**
  251. * Returns true if the string is not a comment
  252. * @access private
  253. * @param string $s
  254. * @returns boolean
  255. ***/
  256. function notComment($s) {
  257. if ($s=="") return false;
  258. $N3Comment = '^[ \t]*\#';
  259. if (ereg($N3Comment,$s)) return false;
  260. else return true;
  261. }
  262.  
  263. /**
  264. * Removes all whitespace tokens from list
  265. * @access private
  266. * @param array $list
  267. ***/
  268. function filterWs($list) {
  269. // var_dump($list);
  270. // """Filter whitespace from a list."""
  271.  
  272. return array_filter($list, array($this,"isWS"));
  273. }
  274. /**
  275. * converts a string to its unicode NFC form (e.g. \uHHHH or \UHHHHHHHH).
  276. *
  277. * @param String $str
  278. * @return String
  279. * @access private
  280. *
  281. */
  282. function str2unicode_nfc($str=""){
  283. $result="";
  284. /* try to detect encoding */
  285. $tmp=str_replace("?", "", $str);
  286. if(strpos(utf8_decode($tmp), "?")===false){
  287. $str=utf8_decode($str);
  288. }
  289. for($i=0,$i_max=strlen($str);$i<$i_max;$i++){
  290. $nr=0;/* unicode dec nr */
  291. /* char */
  292. $char=$str[$i];
  293. /* utf8 binary */
  294. $utf8_char=utf8_encode($char);
  295. $bytes=strlen($utf8_char);
  296. if($bytes==1){
  297. /* 0####### (0-127) */
  298. $nr=ord($utf8_char);
  299. }
  300. elseif($bytes==2){
  301. /* 110##### 10###### = 192+x 128+x */
  302. $nr=((ord($utf8_char[0])-192)*64) + (ord($utf8_char[1])-128);
  303. }
  304. elseif($bytes==3){
  305. /* 1110#### 10###### 10###### = 224+x 128+x 128+x */
  306. $nr=((ord($utf8_char[0])-224)*4096) + ((ord($utf8_char[1])-128)*64) + (ord($utf8_char[2])-128);
  307. }
  308. elseif($bytes==4){
  309. /* 1111#### 10###### 10###### 10###### = 240+x 128+x 128+x 128+x */
  310. $nr=((ord($utf8_char[0])-240)*262144) + ((ord($utf8_char[1])-128)*4096) + ((ord($utf8_char[2])-128)*64) + (ord($utf8_char[3])-128);
  311. }
  312. /* result (see http://www.w3.org/TR/rdf-testcases/#ntrip_strings) */
  313. if($nr<9){/* #x0-#x8 (0-8) */
  314. $result.="\\u".sprintf("%04X",$nr);
  315. }
  316. elseif($nr==9){/* #x9 (9) */
  317. $result.='\t';
  318. }
  319. elseif($nr==10){/* #xA (10) */
  320. $result.='\n';
  321. }
  322. elseif($nr<13){/* #xB-#xC (11-12) */
  323. $result.="\\u".sprintf("%04X",$nr);
  324. }
  325. elseif($nr==13){/* #xD (13) */
  326. $result.='\t';
  327. }
  328. elseif($nr<32){/* #xE-#x1F (14-31) */
  329. $result.="\\u".sprintf("%04X",$nr);
  330. }
  331. elseif($nr<34){/* #x20-#x21 (32-33) */
  332. $result.=$char;
  333. }
  334. elseif($nr==34){/* #x22 (34) */
  335. $result.='\"';
  336. }
  337. elseif($nr<92){/* #x23-#x5B (35-91) */
  338. $result.=$char;
  339. }
  340. elseif($nr==92){/* #x5C (92) */
  341. $result.='\\';
  342. }
  343. elseif($nr<127){/* #x5D-#x7E (93-126) */
  344. $result.=$char;
  345. }
  346. elseif($nr<65536){/* #x7F-#xFFFF (128-65535) */
  347. $result.="\\u".sprintf("%04X",$nr);
  348. }
  349. elseif($nr<1114112){/* #x10000-#x10FFFF (65536-1114111) */
  350. $result.="\\U".sprintf("%08X",$nr);
  351. }
  352. else{
  353. /* other chars are not defined => ignore */
  354. }
  355. }
  356. return $result;
  357. }
  358.  
  359.  
  360. /**
  361. * Gets a slice of an array.
  362. * Returns the wanted slice, as well as the remainder of the array.
  363. * e.g. getSpan(['p', 'q', 'r'], 1, 2) gives (['q'], ['p', 'r'])
  364. * @return array
  365. * @access private
  366. * @param array $list
  367. * @param integer $start
  368. * @param integer $end
  369. ***/
  370. function getSpan($list, $start, $end) {
  371. $pre=array_slice($list, 0, $start);
  372. $post=array_slice($list, $end);
  373.  
  374. return array(array_slice($list, $start,$end-$start),$this->array_concat($pre,$post));
  375. }
  376.  
  377.  
  378. /**
  379. * Concatenates two arrays
  380. * @param array $a
  381. * @param array $b
  382. * @returns array
  383. * @access private
  384. ***/
  385. function array_concat($a, $b) {
  386. array_splice($a,count($a),0,$b);
  387. return $a;
  388. }
  389.  
  390. /**
  391. * Returns an array with all indexes where item appears in list
  392. * @param array $list
  393. * @param string $item
  394. * @returns array
  395. * @access private
  396. ***/
  397. function posns($list, $item) {
  398. $res=array();
  399. $i=0;
  400. foreach ( $list as $k=>$v) {
  401. if ($v === $item ) $res[]=$i;
  402. $i++;
  403. }
  404. $res[]=$i;
  405. return $res;
  406. }
  407.  
  408.  
  409. /* More N3 specific functions */
  410.  
  411. /**
  412. * Returns a list of tokens
  413. * @param string $s
  414. * @returns array
  415. * @access private
  416. ***/
  417. function toke($s) {
  418.  
  419. // print "$s\n";
  420. // """Notation3 tokenizer. Takes in a string, returns a raw token list."""
  421. if (strlen($s) == 0) die('Document has no content!');
  422.  
  423. $s=str_replace("\r\n","\n",$s);
  424. $s=str_replace("\r","\n",$s);
  425.  
  426.  
  427. //$lines=explode("\n",$s);
  428.  
  429. //$reallines=array_filter($lines, array($this, "notComment"));
  430. // print "LINES: ".join($reallines, " ")." :LINES\n";
  431. //array_walk($reallines, array($this, "trimLine"));
  432. //$res=array();
  433.  
  434. // foreach ($reallines as $l) {
  435. //preg_match_all($this->Tokens, $l, $newres);
  436. //$res=$this->array_concat($res,$newres[0]);
  437. //}
  438.  
  439. $res=array();
  440. preg_match_all($this->Tokens, $s, $newres);
  441.  
  442. $res=$this->array_concat($res, array_map('trim', $newres[0]));
  443. return $res;
  444. }
  445. /**
  446. * Returns a list with the elements between start and end as one quoted string
  447. * e.g. listify(["a","b","c","d"],1,2) => ["a","b c", "d"]
  448. * @param array $list
  449. * @param integer $start
  450. * @param integer $end
  451. * @returns array
  452. * @access private
  453. ***/
  454. function listify($list, $start, $end) {
  455.  
  456. //Re-form a list, merge elements start->end into one quoted element
  457. //Start and end are offsets...
  458.  
  459. $l=$end-$start;
  460.  
  461. $s=array_slice($list, 0, $start);
  462. $m=array_slice($list, $start,$l);
  463. $e=array_slice($list, $end);
  464. // array_push($s,"\"".join($m," ")."\"");
  465. array_push($s,$m);
  466. return $this->array_concat($s,$e);
  467. }
  468.  
  469. /**
  470. * Returns an array with prefixes=>namespace mappings
  471. * @param array $list
  472. * @access private
  473. * @returns array
  474. ***/
  475. function getPrefixes($list) {
  476.  
  477. $prefixes=array();
  478. $ns=1;
  479. $name=2;
  480. foreach ($list as $l) {
  481. if ($l=='@prefix') {
  482. // while '@prefix' in list {
  483. $pos=current($list);
  484. //pos = list.index('@prefix')
  485. $r = $this->getSpan($list, $pos, ($pos+4)); # processes the prefix tokens
  486. $binding=$r[0];
  487. $list=$r[1];
  488. $prefixes[$binding[$ns]] = substr($binding[$name],1,-1);
  489. $this->parsedNamespaces[substr($binding[$name],1,-1)] = substr($binding[$ns],0,-1);
  490. }
  491. }
  492. if (count($prefixes)<1) $list= array_slice($list,0);
  493. return array($prefixes, $list);
  494. }
  495.  
  496. /**
  497. * Callback function for replacing "a" elements with the right RDF uri.
  498. * @param string $l
  499. * @access private
  500. ***/
  501. function replace_a_type(&$l,$p) {
  502. if ($l=='a') $l='<'.$this->RDF_NS.'type>';
  503. }
  504.  
  505. /**
  506. * Callback function for replacing "=" elements with the right DAML+OIL uri.
  507. * @param string $l
  508. * @access private
  509. ***/
  510. function replace_equal(&$l,$p) {
  511. if ($l=='=') $l='<'.$this->OWL_NS.'sameAs>';
  512. }
  513.  
  514. /**
  515. * Callback function for replacing "this" elements with the right RDF uri.
  516. * @param string $l
  517. * @access private
  518. ***/
  519. function replace_this($l,$p) {
  520. if ($l=='this') $l='<urn:urn-n:this>';
  521. }
  522.  
  523. /**
  524. * Applies stuff :)
  525. * Expands namespace prefixes etc.
  526. * @param array $prefixes
  527. * @param array $list
  528. * @returns $list
  529. * @access private
  530. ***/
  531. function applyStuff($prefixes, $list) {
  532.  
  533. array_walk($list, array($this, 'replace_a_type'));
  534. array_walk($list, array($this, 'replace_equal'));
  535. array_walk($list, array($this, 'replace_this'));
  536.  
  537. for ($i=0;$i<count($list);$i++) {
  538. // for i in range(len(list)) {
  539. // if (!strstr('<_"?.;,{}[]()',$list[$i]{0})) {
  540.  
  541.  
  542. // if a <> resource occours, change it to the parsed filename or local URI + timestamp
  543.  
  544. if ($list[$i]=='<>') {
  545. if (!isset($path)) {
  546. if (!isset($_SERVER['SERVER_ADDR'])) $_SERVER['SERVER_ADDR']='localhost';
  547. if (!isset($_SERVER['REQUEST_URI'])) $_SERVER['REQUEST_URI']='/rdfapi-php';
  548. $list[$i]='<http://'.$_SERVER['SERVER_ADDR'].$_SERVER['REQUEST_URI'].'#generate_timestamp_'.time().'>';
  549. }else {$list[$i]='<'.$path.'>';};
  550. };
  551.  
  552.  
  553. if ((!strstr('<_"?.;,{}[]()@',$list[$i]{0}))AND (substr($list[$i],0,3)!='^^<')) {
  554. $_r= explode(":",$list[$i]);
  555.  
  556.  
  557.  
  558.  
  559.  
  560. $ns=$_r[0].':';
  561. $name=$_r[1];
  562. if (isset($prefixes[$ns])) $list[$i] = '<'.$prefixes[$ns].$name.'>';
  563. else if (isset($prefixes[substr($ns,2)])) $list[$i] = '^^'.$prefixes[substr($ns,2)].$name.'';
  564. else {
  565. #die('Prefix not declared:'.$ns);
  566. $this->parseError=true;
  567. trigger_error('Prefix not declared: '.$ns, E_USER_ERROR);
  568. break;
  569. }
  570. } else {
  571. if ($list[$i]{0} == '"') { // Congratulations - it's a literal!
  572. if (substr($list[$i],0,3) == '"""') {
  573. if (substr($list[$i],-3,3) == '"""') { // A big literal...
  574. $lit = substr($list[$i],3,-3);
  575. // print "++$lit++";
  576. $lit=str_replace('\n', '\\n',$lit);
  577.  
  578. $lit=ereg_replace("[^\\]\"", "\\\"", $lit);
  579.  
  580. $list[$i] = '"'.$lit.'"';
  581. }
  582. else { die ('Incorrect string formatting: '.substr($list[$i],-3,3)); }
  583. } else {
  584. if (strstr($list[$i],"\n")) die('Newline in literal: '+$list[$i]);
  585. }
  586. }
  587. }
  588. if (substr($list[$i],0,2)=='^^') {
  589. if ($list[$i][2]!='<'){$list[$i]='^^<'.substr($list[$i],2).'>';};
  590. };
  591.  
  592. }
  593.  
  594.  
  595. return $list;
  596. }
  597.  
  598. /**
  599. * Returns an array of triples extracted from the list of n3 tokens
  600. * @param array $list
  601. * @returns array
  602. * @access private
  603. ***/
  604. function getStatements($list) {
  605.  
  606.  
  607. $statements = array();
  608.  
  609. while (in_array('.', $list)) {
  610. // for($i=0;$i<count($list); $i++) {
  611. // if ($list[$i]==".") {
  612. // while '.' in list {
  613. $pos=array_search('.',$list);
  614.  
  615. $r=$this->getSpan($list, 0, $pos+1);
  616.  
  617. $statement=$r[0];
  618. $list = $r[1];
  619.  
  620. array_pop($statement);
  621. $statements[]=$statement;
  622. }
  623.  
  624. return $statements;
  625. }
  626. /**
  627. * Gets a list of triples with same subject
  628. * e.g. :Gunnar :firstname "Gunnar" ; :lastname "Grimnes.
  629. * @param array $list
  630. * @returns array
  631. * @acces private
  632. ***/
  633. function getPovs($list) {
  634. $povs = array();
  635. while (in_array(';', $list)) {
  636. $r=$this->posns($list,';');
  637. $pos=array_slice($r,0,2);
  638. $r = $this->getSpan($list, $pos[0], $pos[1]);
  639. $pov=$r[0];
  640. $list=$r[1];
  641.  
  642. $povs[]=array_slice($pov,1);
  643. }
  644.  
  645. return array($list, $povs);
  646. }
  647.  
  648. /**
  649. * Gets a list of triples with same predicate
  650. * e.g. :Gunnar :likes "Cheese", "Wine".
  651. * @access private
  652. * @param array $list
  653. * @returns array
  654. ***/
  655. function getObjs($list) {
  656.  
  657.  
  658. $objs = array();
  659. while (in_array(",",$list)) {
  660. $pos=array_search(",",$list);
  661. // for($i=0;$i<count($list); $i++) {
  662. // if ($list[$i]==",") {
  663. // while ',' in list {
  664.  
  665.  
  666. $get_array_fields=2;
  667. if (isset ($list[$pos+2])) {
  668. if (@$list[$pos+2][0]=='@') $get_array_fields++;
  669. if (@$list[$pos+2][0]=='^') $get_array_fields++;
  670. };
  671. if (isset ($list[$pos+3])) { if (@$list[$pos+3][0]=='^') $get_array_fields++;};
  672.  
  673.  
  674. $r=$this->getSpan($list, $pos, ($pos+$get_array_fields));
  675.  
  676. $obj=$r[0];
  677. if (!isset($obj[2])) $obj[2]=' ';
  678. if (!isset($obj[3])) $obj[3]=' ';
  679. $list=$r[1];
  680.  
  681. $objs[]=$obj;
  682.  
  683. }
  684. return array($list, $objs);
  685. }
  686.  
  687. /**
  688. * Does the real work, returns a list of subject, predicate, object triples.
  689. * @param array $list
  690. * @returns array
  691. * @access private
  692. ***/
  693. function statementize($list) {
  694.  
  695. if (count($list) == 1 && preg_match("/_".BNODE_PREFIX."[0-9]+_/",$list[0])) {
  696. if ($this->debug) print "Ignored bNode exists statement. $list\n";
  697. return array();
  698. }
  699.  
  700. if (count($list) == 3) return array($list);
  701. if (count($list) < 3) die("Error: statement too short!");
  702.  
  703. //Get all ;
  704. $r=$this->getPovs($list);
  705. $spo=$r[0];
  706. $po=$r[1];
  707. $all=array();
  708.  
  709.  
  710.  
  711. // (spo, po), all = getPovs(list), []
  712. $subject = $spo[0];
  713. foreach ($po as $pop) {
  714. // for pop in po {
  715. $r=$this->getObjs($pop);
  716.  
  717. $myPo=$r[0];
  718. $obj=$r[1];
  719. //myPo, obj = getObjs(pop)
  720.  
  721. if (!isset($myPo[2])) $myPo[2]=' ';
  722. if (!isset($myPo[3])) $myPo[3]=' ';
  723. $predicate = $myPo[0];
  724. $all[]=array($subject,$predicate,$myPo[1],$myPo[2],$myPo[3]);
  725. // all.append([subject, predicate, myPo[1]])
  726.  
  727.  
  728. foreach ($obj as $o) $all[]=array($subject,$predicate, $o[1],$o[2],$o[3]);
  729. // for x in obj: all.append([subject, predicate, x])
  730.  
  731. }
  732.  
  733.  
  734.  
  735. $r = $this->getObjs($spo);
  736. $spo=$r[0];
  737.  
  738. $objs=$r[1];
  739.  
  740. //spo, objs = getObjs(spo)
  741. $subject=$spo[0];
  742. $predicate=$spo[1];
  743.  
  744. if(!isset($spo[3])) $spo[3]=' ';
  745. if(!isset($spo[4])) $spo[4]=' ';
  746. $all[]=array($subject, $predicate, $spo[2],$spo[3],$spo[4]);
  747.  
  748. foreach ($objs as $obj) $all[]=array($subject, $predicate, $obj[1],$obj[2],$obj[3]);
  749.  
  750. return $all;
  751. }
  752.  
  753. /**
  754. * Makes lists of elements in list into a seperate array element.
  755. * e.g. doLists(["a","b","[","c","]","d"], "[","]")=> ["a","b", ["c"], "d"]
  756. * @param array $list
  757. * @param string $schar
  758. * @param string $echar
  759. * @returns array
  760. * @access private
  761. ***/
  762. function doLists($list, $schar, $echar) {
  763.  
  764. while (in_array($schar, $list)) {
  765. // while schar in list {
  766. $ndict=array();
  767. $nestingLevel=0;
  768. $biggest=0;
  769. for ($i=0;$i<count($list);$i++) {
  770. if ($list[$i] == $schar) {
  771. $nestingLevel += 1;
  772. if (!in_array($nestingLevel, array_keys($ndict))) {
  773. $ndict[$nestingLevel] = array(array($i));
  774. } else {
  775. $ndict[$nestingLevel][]=array($i);
  776. }
  777. }
  778. if ($list[$i] == $echar) {
  779. if (!in_array($nestingLevel, array_keys($ndict))) {
  780. $ndict[$nestingLevel]=array(array($i));
  781. } else {
  782. $ndict[$nestingLevel][count($ndict[$nestingLevel])-1][]=$i;
  783. $nestingLevel-= 1;
  784. # elif type(list[i]) == type([]) {
  785. # list[i] = doLists(list[i], schar, echar)
  786. }
  787. }
  788. }
  789. foreach (array_keys($ndict) as $key)
  790. if ($key > $biggest) $biggest = $key;
  791.  
  792. $tol = $ndict[$biggest][0];
  793. $list = $this->listify($list, $tol[0], ($tol[1]+1));
  794. }
  795. return $list;
  796. }
  797.  
  798. /**
  799. * Apply doLists for all different types of list.
  800. * @param array
  801. * @returns array
  802. * @access private
  803. ***/
  804. function listStuff($list) {
  805. # y, z = zip(['[', ']'], ['{', '}'], ['(', ')'])
  806. # return map(doLists, [list, list, list], y, z).pop()
  807. $list = $this->doLists($list, '[', ']');
  808. $list = $this->doLists($list, '{', '}');
  809. return $this->doLists($list, '(', ')');
  810. }
  811.  
  812. /**
  813. * Generates a new node id.
  814. * @access private
  815. * @returns string
  816. ***/
  817. function bnodeID() {
  818. $this->bNode++;
  819. return "_".BNODE_PREFIX.$this->bNode."_";
  820. }
  821.  
  822. /**
  823. * This makes bNodes out of variables like _:a etc.
  824. * @access private
  825. * @param array $list
  826. * @returns array
  827. ***/
  828. function fixAnon($list) {
  829. // $map=array();
  830. for($i=0;$i<count($list);$i++) {
  831. $l=$list[$i];
  832. if (substr($l,0,2)=="_:") {
  833. if (!isset($this->bNodeMap[$l])) {
  834. $a=$this->bnodeID();
  835. $this->bNodeMap[$l]=$a;
  836. } else $a=$this->bNodeMap[$l];
  837. $list[$i]=$a;
  838. }
  839. }
  840. return $list;
  841. }
  842.  
  843. /**
  844. * This makes [ ] lists into bnodes.
  845. * @access private
  846. * @param array $list
  847. * @return array
  848. ***/
  849. function expandLists($list) {
  850.  
  851. for($i=0;$i<count($list);$i++) {
  852. if (is_array($list[$i])) {
  853. if ( $list[$i][0]=='[' ) {
  854. $bnode=$this->bnodeID();
  855. $prop=$list[$i];
  856. $list[$i]=$bnode;
  857. $list[]=$bnode;
  858. $list=$this->array_concat($list, array_slice($prop,1,-1));
  859. $list[]='.';
  860. }elseif($list[$i][0]=='(') {
  861.  
  862. $rdfNil = '<'. RDF_NAMESPACE_URI . RDF_NIL .'>';
  863. $rdfFirst = '<'. RDF_NAMESPACE_URI . RDF_FIRST .'>';
  864. $rdfRest = '<'. RDF_NAMESPACE_URI . RDF_REST .'>';
  865.  
  866. // local copy of list without "(" and ")"
  867. $t_list = array_slice($list[$i], 1, -1);
  868.  
  869. //prepare bnodes
  870. $fromBnode = $this->bnodeID();
  871. $toBnode = $this->bnodeID();
  872.  
  873. //link first bnode into graph
  874. $list[$i] = $fromBnode;
  875.  
  876. $count = count($t_list);
  877.  
  878. //loop through list, convert to RDF linked list
  879. for ($idx = 0; $idx < $count; $idx++){
  880.  
  881. // set rdf:first
  882. $list[] = $fromBnode;
  883. $list[] = $rdfFirst;
  884. $list[] = $t_list[$idx];
  885. $list[] = '.';
  886.  
  887. // set rdf:rest (nil or next bnode)
  888. if ($idx == $count - 1) {
  889. $list[] = $fromBnode;
  890. $list[] = $rdfRest;
  891. $list[] = $rdfNil;
  892. $list[] = '.';
  893. }
  894. else {
  895. $list[] = $fromBnode;
  896. $list[] = $rdfRest;
  897. $list[] = $toBnode;
  898. $list[] = '.';
  899.  
  900. $fromBnode = $toBnode;
  901. $toBnode = $this->bnodeID();
  902. }
  903. }
  904. }
  905. else {
  906. die('Only [ ] and () lists are supported!');
  907. }
  908. }
  909.  
  910. }
  911. return $list;
  912. }
  913.  
  914. /**
  915. * Main work-horse function. This converts a N3 string to a list of statements
  916. * @param string $s
  917. * @returns array
  918. * @access private
  919. ***/
  920. function n3tolist($s) {
  921.  
  922. // """Convert an N3 string into a list of triples as strings."""
  923. $result = array();
  924.  
  925. $t = $this->filterWs($this->toke($s)); # tokenize the stream, and filter whitespace tokens
  926.  
  927. if ($this->debug) {
  928. print "Filter WS:\n";
  929. var_dump($t);
  930. }
  931. $r=$this->getPrefixes($t); # get the prefix directives, and add to a dict
  932. $prefixes=$r[0];
  933. $t=$r[1];
  934. if ($this->debug) {
  935. print "Prefixes:\n";
  936. var_dump($prefixes);
  937. print "***\n";
  938. var_dump($t);
  939. }
  940. $t=$this->applyStuff($prefixes, $t);#apply prefixes, keywords, and string formatting
  941. if ($this->debug) {
  942. print "Stuff applied:\n";
  943. var_dump($t);
  944. }
  945.  
  946. $t=$this->fixAnon($t); # fix _:a anons
  947. if ($this->debug) {
  948. print "Fix anon:\n";
  949. var_dump($t);
  950. }
  951. $t = $this->listStuff($t); # apply list stuff: todo
  952. if ($this->debug) {
  953. print "Lists done:\n";
  954. var_dump($t);
  955. }
  956. $t=$this->expandLists($t);
  957. if ($this->debug) {
  958. print "Lists applied:\n";
  959. var_dump($t);
  960. }
  961. $t = $this->getStatements($t); # get all of the "statements" from the stream
  962.  
  963. foreach ($t as $stat) {
  964. $stats=$this->statementize($stat);
  965. foreach ($stats as $y) {
  966. $result[]=$y;
  967. }
  968. }
  969. // for x in [statementize(stat) for stat in t] {
  970. // for y in x: result.append(y)
  971. return $result;
  972. }
  973.  
  974. /**
  975. * Constructs a RAP RDFNode from URI/Literal/Bnode
  976. * @access private
  977. * @param string $s
  978. * @returns object RDFNode
  979. ***/
  980. function toRDFNode($s,$state) {
  981. $ins=substr($s,1,-1);
  982. if ($s{0}=="\"") {
  983. $lang=NULL;
  984.  
  985.  
  986. if (count($state)>3) {
  987.  
  988.  
  989. for ($i = 3; $i < count($state); $i++){
  990. if ($state[$i][0]=='@')$lang=substr($state[3],1);
  991. if (substr($state[$i],0,2)=='^^'){
  992. $dtype=substr($state[$i],2);
  993. if ($dtype[0]=='<') $dtype= substr($dtype,1,-1);
  994. };
  995. };
  996. };
  997. if(UNIC_RDF){
  998. $ins=$this->str2unicode_nfc($ins);
  999. }
  1000. $new_Literal=new Literal($ins,$lang);
  1001. if (isset($dtype)) $new_Literal->setDatatype($dtype);
  1002. return $new_Literal;
  1003. };
  1004.  
  1005. if (strstr($s,'_'.BNODE_PREFIX)) {
  1006. if (($this->FixBnodes) OR (!array_search($s,$this->bNodeMap))) {
  1007. return new BlankNode($ins);
  1008. } else {return new BlankNode(trim(substr(array_search($s,$this->bNodeMap),2)));
  1009. };
  1010. }
  1011. return new Resource($ins);
  1012. }
  1013.  
  1014.  
  1015. } //end: N3Parser
  1016.  
  1017. ?>

Documentation generated on Thu, 7 Jul 2005 13:42:11 +0200 by phpDocumentor 1.3.0RC3