I have this string:
- Page 1
- Page 2
- Sub Page A
I leave a second answer because this time this demonstrates how to do it with the single mapping (in pseudocode):
foreach //li ::
ID := string(./@id)
ParentID := string(./ancestor::li[1]/@id)
Label := normalize-space(./text()[1])
Order := count(./preceding-sibling::li)+1
Children := implode(",", ./ul/li/@id)
Because this can be done per each li
node regardless in which order, this could be a perfect match for an Iterator, here the current function:
public function current() {
return [
'ID' => $this->evaluate('number(./@id)'),
'label' => $this->evaluate('normalize-space(./text()[1])'),
'order' => $this->evaluate('count(./preceding-sibling::li)+1'),
'parentID' => $this->evaluate('number(concat("0", ./ancestor::li[1]/@id))'),
'children' => $this->implodeNodes(',', './ul/li/@id'),
];
}
Full example (Demo) output and code:
+----+----------------+-------+--------+----------+
| ID | LABEL | ORDER | PARENT | CHILDREN |
+----+----------------+-------+--------+----------+
| 1 | Page 1 | 1 | 0 | |
| 2 | Page 2 | 2 | 0 | 3,4,5 |
| 3 | Sub Page A | 1 | 2 | |
| 4 | Sub Page B | 2 | 2 | |
| 5 | Sub Page C | 3 | 2 | 6 |
| 6 | Sub Sub Page I | 1 | 5 | |
| 7 | Page 3 | 3 | 0 | 8 |
| 8 | Sub Page D | 1 | 7 | |
| 9 | Page 4 | 4 | 0 | |
+----+----------------+-------+--------+----------+
class HtmlListIterator extends IteratorIterator
{
private $xpath;
public function __construct($html) {
$doc = new DOMDocument();
$doc->loadHTML($html);
$this->xpath = new DOMXPath($doc);
parent::__construct($this->xpath->query('//li'));
}
private function evaluate($expression) {
return $this->xpath->evaluate($expression, parent::current());
}
private function implodeNodes($glue, $expression) {
return implode(
$glue, array_map(function ($a) {
return $a->nodeValue;
}, iterator_to_array($this->evaluate($expression, parent::current())))
);
}
public function current() {
return [
'ID' => $this->evaluate('number(./@id)'),
'label' => $this->evaluate('normalize-space(./text()[1])'),
'order' => $this->evaluate('count(./preceding-sibling::li)+1'),
'parentID' => $this->evaluate('number(concat("0", ./ancestor::li[1]/@id))'),
'children' => $this->implodeNodes(',', './ul/li/@id'),
];
}
}
print_result(new HtmlListIterator($html));
function print_result($result) {
echo '+----+----------------+-------+--------+----------+
| ID | LABEL | ORDER | PARENT | CHILDREN |
+----+----------------+-------+--------+----------+
';
foreach ($result as $line) {
vprintf("| %' 2d | %' -14s | %' 2d | %' 2d | %-8s |\n", $line);
}
echo '+----+----------------+-------+--------+----------+
';
}