parsing working
This commit is contained in:
parent
44a06ef9fb
commit
049d847fa9
15 changed files with 384 additions and 38 deletions
|
@ -79,7 +79,7 @@ class ContainerGroup implements ContainerInterface, NodeInterface
|
|||
|
||||
public function addChild(NodeInterface|Stringable|string $child, bool $prepend = false, bool $skip_sanitize = false): static
|
||||
{
|
||||
if ($this->willAccept($child, false)) {
|
||||
if ($this->willAccept($child)) {
|
||||
$this->doAddChild($child, $prepend, $skip_sanitize);
|
||||
$this->enforceChildLimit($prepend);
|
||||
}
|
||||
|
@ -88,7 +88,7 @@ class ContainerGroup implements ContainerInterface, NodeInterface
|
|||
|
||||
public function addChildAfter(NodeInterface|Stringable|string $new_child, NodeInterface|Stringable|string $after_child, bool $skip_sanitize = false): static
|
||||
{
|
||||
if ($this->willAccept($new_child, false)) {
|
||||
if ($this->willAccept($new_child)) {
|
||||
$this->doAddChildAfter($new_child, $after_child, $skip_sanitize);
|
||||
$this->enforceChildLimit(false);
|
||||
}
|
||||
|
@ -97,7 +97,7 @@ class ContainerGroup implements ContainerInterface, NodeInterface
|
|||
|
||||
public function addChildBefore(NodeInterface|Stringable|string $new_child, NodeInterface|Stringable|string $before_child, bool $skip_sanitize = false): static
|
||||
{
|
||||
if ($this->willAccept($new_child, false)) {
|
||||
if ($this->willAccept($new_child)) {
|
||||
$this->doAddChildBefore($new_child, $before_child, $skip_sanitize);
|
||||
$this->enforceChildLimit(true);
|
||||
}
|
||||
|
@ -118,13 +118,8 @@ class ContainerGroup implements ContainerInterface, NodeInterface
|
|||
}
|
||||
}
|
||||
|
||||
public function willAccept(NodeInterface|Stringable|string $child, bool $check_limit = true): bool
|
||||
public function willAccept(NodeInterface|Stringable|string $child): bool
|
||||
{
|
||||
if ($check_limit && $this->limit > 0) {
|
||||
if (count($this->children()) >= $this->limit) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if ($child instanceof NodeInterface) {
|
||||
$child = $child->detachCopy();
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ class HtmlTag extends AbstractGroupedTag implements HtmlTagInterface
|
|||
public function __construct()
|
||||
{
|
||||
parent::__construct();
|
||||
$this->head = ContainerGroup::ofClass(HeadTagInterface::class);
|
||||
$this->head = ContainerGroup::ofClass(HeadTagInterface::class, 1);
|
||||
$this->body = ContainerGroup::ofClass(BodyTagInterface::class, 1);
|
||||
$this->addGroup($this->head);
|
||||
$this->addGroup($this->body);
|
||||
|
|
|
@ -2,31 +2,18 @@
|
|||
|
||||
namespace ByJoby\HTML\Containers\DocumentTags;
|
||||
|
||||
use ByJoby\HTML\Traits\NodeTrait;
|
||||
use Exception;
|
||||
use ByJoby\HTML\Tags\AbstractContentTag;
|
||||
use Stringable;
|
||||
|
||||
class TitleTag implements TitleTagInterface
|
||||
class TitleTag extends AbstractContentTag implements TitleTagInterface
|
||||
{
|
||||
const TAG = 'title';
|
||||
|
||||
use NodeTrait;
|
||||
|
||||
/** @var string */
|
||||
protected $title = 'Untitled';
|
||||
protected $content = 'Untitled';
|
||||
|
||||
public function setTitle(string $title): static
|
||||
public function setContent(string|Stringable $content): static
|
||||
{
|
||||
$this->title = trim(strip_tags($title));
|
||||
parent::setContent(trim(strip_tags($content)));
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function title(): string
|
||||
{
|
||||
return $this->title;
|
||||
}
|
||||
|
||||
public function __toString(): string
|
||||
{
|
||||
return '<title>' . $this->title() . '</title>';
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,10 +2,8 @@
|
|||
|
||||
namespace ByJoby\HTML\Containers\DocumentTags;
|
||||
|
||||
use ByJoby\HTML\NodeInterface;
|
||||
use ByJoby\HTML\Tags\ContentTagInterface;
|
||||
|
||||
interface TitleTagInterface extends NodeInterface
|
||||
interface TitleTagInterface extends ContentTagInterface
|
||||
{
|
||||
public function title(): string;
|
||||
public function setTitle(string $title): static;
|
||||
}
|
||||
|
|
|
@ -30,6 +30,14 @@ class Classes implements Countable
|
|||
}
|
||||
}
|
||||
|
||||
public function parse(string $class_string): void
|
||||
{
|
||||
foreach (explode(' ', $class_string) as $class) {
|
||||
$class = trim($class);
|
||||
if ($class) $this->add($class);
|
||||
}
|
||||
}
|
||||
|
||||
public function count(): int
|
||||
{
|
||||
return count($this->classes);
|
||||
|
|
|
@ -35,6 +35,14 @@ class Styles implements Countable, ArrayAccess, Stringable
|
|||
}
|
||||
}
|
||||
|
||||
public function parse(string $css_string): void
|
||||
{
|
||||
foreach (explode(';', $css_string) as $rule) {
|
||||
$rule = explode(':', trim($rule));
|
||||
if (count($rule) == 2) $this[$rule[0]] = $rule[1];
|
||||
}
|
||||
}
|
||||
|
||||
public function count(): int
|
||||
{
|
||||
return count($this->styles);
|
||||
|
|
12
src/Html5/Tags/DivTag.php
Normal file
12
src/Html5/Tags/DivTag.php
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace ByJoby\HTML\Html5\Tags;
|
||||
|
||||
use ByJoby\HTML\ContentCategories\SectioningContent;
|
||||
use ByJoby\HTML\DisplayTypes\DisplayBlock;
|
||||
use ByJoby\HTML\Tags\AbstractContainerTag;
|
||||
|
||||
class DivTag extends AbstractContainerTag implements DisplayBlock, SectioningContent
|
||||
{
|
||||
const TAG = 'div';
|
||||
}
|
12
src/Html5/Tags/PTag.php
Normal file
12
src/Html5/Tags/PTag.php
Normal file
|
@ -0,0 +1,12 @@
|
|||
<?php
|
||||
|
||||
namespace ByJoby\HTML\Html5\Tags;
|
||||
|
||||
use ByJoby\HTML\ContentCategories\SectioningContent;
|
||||
use ByJoby\HTML\DisplayTypes\DisplayBlock;
|
||||
use ByJoby\HTML\Tags\AbstractContainerTag;
|
||||
|
||||
class PTag extends AbstractContainerTag implements DisplayBlock, SectioningContent
|
||||
{
|
||||
const TAG = 'p';
|
||||
}
|
40
src/Nodes/CData.php
Normal file
40
src/Nodes/CData.php
Normal file
|
@ -0,0 +1,40 @@
|
|||
<?php
|
||||
|
||||
namespace ByJoby\HTML\Nodes;
|
||||
|
||||
use ByJoby\HTML\Traits\NodeTrait;
|
||||
use Stringable;
|
||||
|
||||
class CData implements CDataInterface
|
||||
{
|
||||
use NodeTrait;
|
||||
|
||||
public function __construct(protected Stringable|string $value)
|
||||
{
|
||||
}
|
||||
|
||||
public function value(): string
|
||||
{
|
||||
return $this->value;
|
||||
}
|
||||
|
||||
public function setValue(string|Stringable $value): static
|
||||
{
|
||||
$this->value = $value;
|
||||
return $this;
|
||||
}
|
||||
|
||||
public function __toString(): string
|
||||
{
|
||||
return sprintf(
|
||||
'<![CDATA[%s]]>',
|
||||
str_replace(
|
||||
// ending sequence cannot be replaced
|
||||
']]>',
|
||||
// we can split here and render as two cdata sections though -- messy, but the only option
|
||||
']]]]><![CDATA[>',
|
||||
$this->value()
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
13
src/Nodes/CDataInterface.php
Normal file
13
src/Nodes/CDataInterface.php
Normal file
|
@ -0,0 +1,13 @@
|
|||
<?php
|
||||
|
||||
namespace ByJoby\HTML\Nodes;
|
||||
|
||||
use ByJoby\HTML\NodeInterface;
|
||||
use Stringable;
|
||||
|
||||
interface CDataInterface extends NodeInterface
|
||||
{
|
||||
public function __construct(Stringable|string $value);
|
||||
public function value(): string;
|
||||
public function setValue(string|Stringable $value): static;
|
||||
}
|
188
src/Parser.php
Normal file
188
src/Parser.php
Normal file
|
@ -0,0 +1,188 @@
|
|||
<?php
|
||||
|
||||
namespace ByJoby\HTML;
|
||||
|
||||
use ByJoby\HTML\Containers\Fragment;
|
||||
use ByJoby\HTML\Containers\FragmentInterface;
|
||||
use ByJoby\HTML\Containers\GenericHtmlDocument;
|
||||
use ByJoby\HTML\Containers\HtmlDocumentInterface;
|
||||
use ByJoby\HTML\Nodes\CData;
|
||||
use ByJoby\HTML\Nodes\CDataInterface;
|
||||
use ByJoby\HTML\Nodes\Comment;
|
||||
use ByJoby\HTML\Nodes\CommentInterface;
|
||||
use ByJoby\HTML\Nodes\Text;
|
||||
use ByJoby\HTML\Nodes\TextInterface;
|
||||
use ByJoby\HTML\Tags\ContentTagInterface;
|
||||
use ByJoby\HTML\Tags\TagInterface;
|
||||
use DOMComment;
|
||||
use DOMDocument;
|
||||
use DOMElement;
|
||||
use DOMNode;
|
||||
use DOMText;
|
||||
|
||||
class Parser
|
||||
{
|
||||
/** @var array<int,string> */
|
||||
protected $tag_namespaces = [
|
||||
'\\ByJoby\\HTML\\Html5\\Tags\\',
|
||||
'\\ByJoby\\HTML\\Containers\\DocumentTags\\'
|
||||
];
|
||||
|
||||
/** @var array<string,class-string<TagInterface>> */
|
||||
protected $tag_classes = [];
|
||||
|
||||
/** @var class-string<CommentInterface> */
|
||||
protected $comment_class = Comment::class;
|
||||
|
||||
/** @var class-string<TextInterface> */
|
||||
protected $text_class = Text::class;
|
||||
|
||||
/** @var class-string<CDataInterface> */
|
||||
protected $cdata_class = CData::class;
|
||||
|
||||
/** @var class-string<HtmlDocumentInterface> */
|
||||
protected $document_class = GenericHtmlDocument::class;
|
||||
|
||||
/** @var class-string<FragmentInterface> */
|
||||
protected $fragment_class = Fragment::class;
|
||||
|
||||
public function parseFragment(string $html): FragmentInterface
|
||||
{
|
||||
$fragment = new ($this->fragment_class);
|
||||
$dom = new DOMDocument();
|
||||
$dom->loadHTML(
|
||||
'<div>' . $html . '</div>', // wrap in DIV otherwise it will wrap root-level text in P tags
|
||||
LIBXML_BIGLINES
|
||||
| LIBXML_COMPACT
|
||||
| LIBXML_HTML_NOIMPLIED
|
||||
| LIBXML_HTML_NODEFDTD
|
||||
| LIBXML_PARSEHUGE
|
||||
| LIBXML_NOERROR
|
||||
);
|
||||
$this->walkDom($dom->childNodes[0], $fragment);
|
||||
return $fragment;
|
||||
}
|
||||
|
||||
public function parseDocument(string $html): HtmlDocumentInterface
|
||||
{
|
||||
/** @var HtmlDocumentInterface */
|
||||
$document = new ($this->document_class);
|
||||
$dom = new DOMDocument();
|
||||
$dom->loadHTML(
|
||||
$html,
|
||||
LIBXML_BIGLINES
|
||||
| LIBXML_COMPACT
|
||||
| LIBXML_HTML_NODEFDTD
|
||||
| LIBXML_PARSEHUGE
|
||||
| LIBXML_NOERROR
|
||||
);
|
||||
$this->walkDom($dom, $document);
|
||||
return $document;
|
||||
}
|
||||
|
||||
protected function walkDom(DOMNode $node, ContainerInterface $parent): void
|
||||
{
|
||||
foreach ($node->childNodes as $child) {
|
||||
if ($converted_child = $this->convertNode($child)) {
|
||||
// append converted child to parent
|
||||
$parent->addChild($converted_child);
|
||||
// walk DOM for child if it is a container
|
||||
if ($converted_child instanceof ContainerInterface) {
|
||||
$this->walkDom($child, $converted_child);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected function convertNode(DOMNode $node): null|NodeInterface
|
||||
{
|
||||
if ($node instanceof DOMElement) {
|
||||
return $this->convertNodeToTag($node);
|
||||
} elseif ($node instanceof DOMComment) {
|
||||
return new ($this->comment_class)($node->textContent);
|
||||
} elseif ($node instanceof DOMText) {
|
||||
return new ($this->text_class)($node->textContent);
|
||||
}
|
||||
// This line shouldn't be reached, but if it is it's philosophically
|
||||
// consistent to simply ignore unknown node types
|
||||
return null; // @codeCoverageIgnore
|
||||
}
|
||||
|
||||
protected function convertNodeToTag(DOMElement $node): null|NodeInterface
|
||||
{
|
||||
// build object
|
||||
$class = $this->tagClass($node->tagName);
|
||||
if (!$class) return null;
|
||||
$tag = new $class;
|
||||
// tool for settin gup content tags
|
||||
if ($tag instanceof ContentTagInterface) {
|
||||
$tag->setContent($node->textContent);
|
||||
}
|
||||
// external helper methods to stay tidy
|
||||
if ($tag instanceof TagInterface) {
|
||||
$this->processAttributes($node, $tag);
|
||||
}
|
||||
return $tag;
|
||||
}
|
||||
|
||||
protected function processAttributes(DOMElement $node, TagInterface $tag): void
|
||||
{
|
||||
if (!$node->attributes) return;
|
||||
/** @var array<string,string|bool> */
|
||||
$attributes = [];
|
||||
// absorb attributes
|
||||
/** @var DOMNode $attribute */
|
||||
foreach ($node->attributes as $attribute) {
|
||||
if ($attribute->nodeValue) {
|
||||
$attributes[$attribute->nodeName] = $attribute->nodeValue;
|
||||
} else {
|
||||
$attributes[$attribute->nodeName] = true;
|
||||
}
|
||||
}
|
||||
// set attributes
|
||||
foreach ($attributes as $k => $v) {
|
||||
if ($k == 'id' && is_string($v)) {
|
||||
$tag->setID($v);
|
||||
} elseif ($k == 'class' && is_string($v)) {
|
||||
$tag->classes()->parse($v);
|
||||
} elseif ($k == 'style' && is_string($v)) {
|
||||
$tag->styles()->parse($v);
|
||||
} else {
|
||||
// make an effort to set ID
|
||||
try {
|
||||
$tag->attributes()["$k"] = $v;
|
||||
}
|
||||
// it is correct to ignore attributes that are unsettable
|
||||
catch (\Throwable $th) { // @codeCoverageIgnore
|
||||
// does nothing
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $tag
|
||||
* @return class-string<NodeInterface>|null
|
||||
*/
|
||||
protected function tagClass(string $tag): string|null
|
||||
{
|
||||
// look for an explicitly-set class
|
||||
if (isset($this->tag_classes[$tag])) {
|
||||
return $this->tag_classes[$tag];
|
||||
}
|
||||
// otherwise loop through namespaces and try to find a tag
|
||||
foreach ($this->tag_namespaces as $namespace) {
|
||||
$class = $namespace . ucfirst($tag) . 'Tag';
|
||||
if (class_exists($class)) {
|
||||
$implements = class_implements($class);
|
||||
$implements = $implements ? $implements : [];
|
||||
if (in_array(NodeInterface::class, $implements)) {
|
||||
$this->tag_classes[$tag] = $class; // @phpstan-ignore-line
|
||||
return $class; // @phpstan-ignore-line
|
||||
}
|
||||
}
|
||||
}
|
||||
// return null if nothing found
|
||||
return null;
|
||||
}
|
||||
}
|
|
@ -9,9 +9,9 @@ class TitleTagTest extends TestCase
|
|||
public function testGetAndSet()
|
||||
{
|
||||
$title = new TitleTag;
|
||||
$this->assertEquals('Untitled', $title->title());
|
||||
$title->setTitle('<strong>Titled</strong>');
|
||||
$this->assertEquals('Titled', $title->title());
|
||||
$this->assertEquals('<title>Titled</title>', $title->__toString());
|
||||
$this->assertEquals('Untitled', $title->content());
|
||||
$title->setContent('<strong>Titled</strong>');
|
||||
$this->assertEquals('Titled', $title->content());
|
||||
$this->assertEquals('<title>' . PHP_EOL . 'Titled' . PHP_EOL . '</title>', $title->__toString());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ class GenericHtmlDocumentTest extends TestCase
|
|||
'<!DOCTYPE html>',
|
||||
'<html>',
|
||||
'<head>',
|
||||
'<title>Untitled</title>',
|
||||
'<title>', 'Untitled', '</title>',
|
||||
'</head>',
|
||||
'<body></body>',
|
||||
'</html>'
|
||||
|
|
24
tests/Nodes/CDataTest.php
Normal file
24
tests/Nodes/CDataTest.php
Normal file
|
@ -0,0 +1,24 @@
|
|||
<?php
|
||||
|
||||
namespace ByJoby\HTML\Nodes;
|
||||
|
||||
use PHPUnit\Framework\TestCase;
|
||||
|
||||
class CDataTest extends TestCase
|
||||
{
|
||||
public function testSimpleText(): void
|
||||
{
|
||||
$this->assertEquals('<![CDATA[]]>', new CData(''));
|
||||
$this->assertEquals('<![CDATA[foo]]>', new CData('foo'));
|
||||
$this->assertEquals('<![CDATA[foo-bar]]>', new CData('foo-bar'));
|
||||
$this->assertEquals('<![CDATA[foo]]]]><![CDATA[>bar]]>', (new CData('foo]]>bar'))->__toString());
|
||||
}
|
||||
|
||||
public function testModification(): void
|
||||
{
|
||||
$cdata = new CData('foo');
|
||||
$this->assertEquals('foo', $cdata->value());
|
||||
$cdata->setValue('bar');
|
||||
$this->assertEquals('bar', $cdata->value());
|
||||
}
|
||||
}
|
61
tests/ParserTest.php
Normal file
61
tests/ParserTest.php
Normal file
|
@ -0,0 +1,61 @@
|
|||
<?php
|
||||
|
||||
namespace ByJoby\HTML;
|
||||
|
||||
use ByJoby\HTML\Html5\Tags\DivTag;
|
||||
use ByJoby\HTML\Nodes\TextInterface;
|
||||
use PHPUnit\Framework\TestCase;
|
||||
|
||||
class ParserTest extends TestCase
|
||||
{
|
||||
public function testFragmentRootTextNotWrapped()
|
||||
{
|
||||
$parser = new Parser();
|
||||
$fragment = $parser->parseFragment('foobar');
|
||||
$this->assertInstanceOf(TextInterface::class, $fragment->children()[0]);
|
||||
$fragment = $parser->parseFragment('foobar<div>fizzbuzz</div>');
|
||||
$this->assertInstanceOf(TextInterface::class, $fragment->children()[0]);
|
||||
$this->assertInstanceOf(DivTag::class, $fragment->children()[1]);
|
||||
}
|
||||
|
||||
public function testAttributes()
|
||||
{
|
||||
$parser = new Parser();
|
||||
$fragment = $parser->parseFragment('<div id="foo" a="b" c="d" f></div>');
|
||||
$this->assertEquals('foo', $fragment->children()[0]->id());
|
||||
$this->assertEquals('b', $fragment->children()[0]->attributes()['a']);
|
||||
$this->assertEquals('d', $fragment->children()[0]->attributes()['c']);
|
||||
}
|
||||
|
||||
public function testStylesAndClasses()
|
||||
{
|
||||
$parser = new Parser();
|
||||
$fragment = $parser->parseFragment('<div class="foo bar " style=" color:red; background-color: blue;"></div>');
|
||||
$this->assertEquals(['bar', 'foo'], $fragment->children()[0]->classes()->getArray());
|
||||
$this->assertEquals(['background-color' => 'blue', 'color' => 'red'], $fragment->children()[0]->styles()->getArray());
|
||||
}
|
||||
|
||||
public function testNesting()
|
||||
{
|
||||
$parser = new Parser();
|
||||
$fragment = $parser->parseFragment('<div><p>foo<!-- comment -->bar</p><p>foo</p></div>');
|
||||
$this->assertInstanceOf(DivTag::class, $fragment->children()[0]);
|
||||
$this->assertCount(2, $fragment->children()[0]->children());
|
||||
$this->assertCount(3, $fragment->children()[0]->children()[0]->children());
|
||||
}
|
||||
|
||||
public function testUnknownTags()
|
||||
{
|
||||
$parser = new Parser();
|
||||
$fragment = $parser->parseFragment('<div></div><derp><darp>');
|
||||
$this->assertCount(1, $fragment->children());
|
||||
}
|
||||
|
||||
public function testParseDocument()
|
||||
{
|
||||
$parser = new Parser();
|
||||
$document = $parser->parseDocument('<html><head><title>Title</title></head><body><div>foo</div></body></html>');
|
||||
$this->assertEquals('Title', $document->html()->head()->title()->content());
|
||||
$this->assertEquals('<div>' . PHP_EOL . 'foo' . PHP_EOL . '</div>', $document->body()->children()[0]->__toString());
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue