From 51de2d6f649f6d9d8bd6c8db1a1bfe8825fab17a Mon Sep 17 00:00:00 2001 From: Joby Elliott Date: Fri, 8 Sep 2023 23:03:13 +0000 Subject: [PATCH] more comments, improved boolean attributes --- src/AbstractParser.php | 39 +-- src/Helpers/Attributes.php | 59 +++- src/Html5/Enums/BooleanAttribute.php | 16 + src/Html5/Enums/ReferrerPolicy_script.php | 53 +++ src/Html5/Enums/Type_script.php | 38 +++ src/Html5/Tags/BaseTag.php | 17 +- src/Html5/Tags/LinkTag.php | 68 ++-- src/Html5/Tags/NoscriptTag.php | 8 +- src/Html5/Tags/ScriptTag.php | 315 +++++++++++++++--- src/Html5/Tags/StyleTag.php | 90 ++++- src/Html5/TextContentTags/BlockquoteTag.php | 46 ++- src/Html5/TextContentTags/DdTag.php | 7 +- src/Html5/TextContentTags/DivTag.php | 9 +- src/Html5/TextContentTags/DlTag.php | 9 +- src/Html5/TextContentTags/DtTag.php | 12 +- src/Html5/TextContentTags/FigcaptionTag.php | 7 +- src/Html5/TextContentTags/FigureTag.php | 16 +- src/Html5/TextContentTags/HrTag.php | 8 +- src/Html5/TextContentTags/LiTag.php | 17 +- src/Html5/TextContentTags/MenuTag.php | 9 +- src/Html5/TextContentTags/OlTag.php | 26 +- src/Traits/TagTrait.php | 11 +- tests/Containers/FragmentTest.php | 11 +- tests/Helpers/AttributesTest.php | 167 +++++++++- tests/Html5/Tags/LinkTagTest.php | 50 ++- tests/Html5/Tags/MetaTagTest.php | 64 +++- tests/Html5/Tags/ScriptTagTest.php | 12 +- tests/Html5/TextContentTags/FigureTagTest.php | 4 +- tests/Html5/TextContentTags/OlTagTest.php | 2 +- tests/Tags/AbstractContainerTagTest.php | 9 +- tests/Tags/AbstractTagTest.php | 11 +- 31 files changed, 944 insertions(+), 266 deletions(-) create mode 100644 src/Html5/Enums/BooleanAttribute.php create mode 100644 src/Html5/Enums/ReferrerPolicy_script.php create mode 100644 src/Html5/Enums/Type_script.php diff --git a/src/AbstractParser.php b/src/AbstractParser.php index 28cea59..3c37b9a 100644 --- a/src/AbstractParser.php +++ b/src/AbstractParser.php @@ -5,6 +5,7 @@ namespace ByJoby\HTML; use ByJoby\HTML\Containers\Fragment; use ByJoby\HTML\Containers\FragmentInterface; use ByJoby\HTML\Containers\HtmlDocumentInterface; +use ByJoby\HTML\Html5\Enums\BooleanAttribute; use ByJoby\HTML\Nodes\CData; use ByJoby\HTML\Nodes\CDataInterface; use ByJoby\HTML\Nodes\Comment; @@ -66,33 +67,34 @@ abstract class AbstractParser public function parseFragment(string $html): FragmentInterface { - $fragment = new ($this->fragment_class); + $fragment = new($this->fragment_class); $dom = new DOMDocument(); $dom->loadHTML( '
' . $html . '
', // wrap in DIV otherwise it will wrap root-level text in P tags LIBXML_BIGLINES - | LIBXML_COMPACT - | LIBXML_HTML_NOIMPLIED - | LIBXML_HTML_NODEFDTD - | LIBXML_PARSEHUGE - | LIBXML_NOERROR + | LIBXML_COMPACT + | LIBXML_HTML_NOIMPLIED + | LIBXML_HTML_NODEFDTD + | LIBXML_PARSEHUGE + | LIBXML_NOERROR ); - $this->walkDom($dom->childNodes[0], $fragment); + // @phpstan-ignore-next-line we actually do know there's an item zero + $this->walkDom($dom->childNodes->item(0), $fragment); return $fragment; } public function parseDocument(string $html): HtmlDocumentInterface { /** @var HtmlDocumentInterface */ - $document = new ($this->document_class); + $document = new($this->document_class); $dom = new DOMDocument(); $dom->loadHTML( $html, LIBXML_BIGLINES - | LIBXML_COMPACT - | LIBXML_HTML_NODEFDTD - | LIBXML_PARSEHUGE - | LIBXML_NOERROR + | LIBXML_COMPACT + | LIBXML_HTML_NODEFDTD + | LIBXML_PARSEHUGE + | LIBXML_NOERROR ); $this->walkDom($dom, $document); return $document; @@ -117,11 +119,11 @@ abstract class AbstractParser if ($node instanceof DOMElement) { return $this->convertNodeToTag($node); } elseif ($node instanceof DOMComment) { - return new ($this->comment_class)($node->textContent); + return new($this->comment_class)($node->textContent); } elseif ($node instanceof DOMText) { $content = trim($node->textContent); if ($content) { - return new ($this->text_class)($content); + return new($this->text_class)($content); } } // It's philosophically consistent to simply ignore unknown node types @@ -149,18 +151,17 @@ abstract class AbstractParser protected function processAttributes(DOMElement $node, TagInterface $tag): void { - /** @var array */ $attributes = []; - // absorb attributes + // absorb attributes from DOMNode /** @var DOMNode $attribute */ foreach ($node->attributes ?? [] as $attribute) { if ($attribute->nodeValue) { $attributes[$attribute->nodeName] = $attribute->nodeValue; } else { - $attributes[$attribute->nodeName] = true; + $attributes[$attribute->nodeName] = BooleanAttribute::true; } } - // set attributes + // set attributes internally foreach ($attributes as $k => $v) { if ($k == 'id' && is_string($v)) { $tag->setID($v); @@ -205,4 +206,4 @@ abstract class AbstractParser // return null if nothing found return null; } -} +} \ No newline at end of file diff --git a/src/Helpers/Attributes.php b/src/Helpers/Attributes.php index 17ce3d5..79963e8 100644 --- a/src/Helpers/Attributes.php +++ b/src/Helpers/Attributes.php @@ -5,6 +5,7 @@ namespace ByJoby\HTML\Helpers; use ArrayAccess; use ArrayIterator; use BackedEnum; +use ByJoby\HTML\Html5\Enums\BooleanAttribute; use Exception; use IteratorAggregate; use Stringable; @@ -13,12 +14,12 @@ use Traversable; /** * Holds and validates a set of HTML attribute name/value pairs for use in tags. * - * @implements ArrayAccess - * @implements IteratorAggregate + * @implements ArrayAccess + * @implements IteratorAggregate */ class Attributes implements IteratorAggregate, ArrayAccess { - /** @var array */ + /** @var array */ protected $array = []; /** @var bool */ protected $sorted = true; @@ -26,7 +27,7 @@ class Attributes implements IteratorAggregate, ArrayAccess protected $disallowed = []; /** - * @param null|array $array + * @param null|array $array * @param array $disallowed * @return void */ @@ -69,16 +70,18 @@ class Attributes implements IteratorAggregate, ArrayAccess } /** - * Set a value as a stringable enum array, automatically converting from a single enum or normal array of enums. + * Set a value as an array of enums, which will be internally saved as a + * string separated by $separator. An array of Enum values can also be + * retrieved using asEnumArray(). * * @template T of BackedEnum * @param string $offset - * @param null|BackedEnum|StringableEnumArray|array $value + * @param null|BackedEnum|array $value * @param class-string $enum_class * @param string $separator * @return static */ - public function setEnumArray(string $offset, null|BackedEnum|StringableEnumArray|array $value, string $enum_class, string $separator): static + public function setEnumArray(string $offset, null|BackedEnum|array $value, string $enum_class, string $separator): static { if (is_null($value)) { $value = []; @@ -94,7 +97,9 @@ class Attributes implements IteratorAggregate, ArrayAccess } /** - * Returns a given offset's value as an array of enums. + * Returns a given offset's value as an array of enums. Note that this + * method always returns an array, it will simply be empty for empty + * attributes, unset attributes, or attributes with no valid values in them. * * @template T of BackedEnum * @param string $offset @@ -104,12 +109,31 @@ class Attributes implements IteratorAggregate, ArrayAccess */ public function asEnumArray(string $offset, string $enum_class, string $separator): array { - $value = strval($this->offsetGet($offset)); + $value = $this->offsetGet($offset); + // short circuit if value is a boolean attribute + if ($value instanceof BooleanAttribute) { + return []; + } + // process as string + $value = strval($value); $value = explode($separator, $value); - $value = array_map( - $enum_class::tryFrom(...), - $value - ); + if (!$enum_class::cases()) { + // short-circuit if there are no cases in the enum + return []; + } elseif (is_string($enum_class::cases()[0]->value)) { + // look at string values only + $value = array_map( + fn(string|int $e) => $enum_class::tryFrom(strval($e)), + $value + ); + } else { + // look at int values only + $value = array_map( + fn(string|int $e) => $enum_class::tryFrom(intval($e)), + $value + ); + } + // filter and return $value = array_filter( $value, fn($e) => !empty($e) @@ -126,6 +150,9 @@ class Attributes implements IteratorAggregate, ArrayAccess public function asString(string $offset): null|string|Stringable { $value = $this->offsetGet($offset); + if (is_numeric($value)) { + $value = strval($value); + } if ($value instanceof Stringable || is_string($value)) { return $value; } else { @@ -142,8 +169,8 @@ class Attributes implements IteratorAggregate, ArrayAccess public function asInt(string $offset): null|int { $value = $this->asNumber($offset); - if (is_int($value)) { - return $value; + if (is_numeric($value)) { + return intval($value); } else { return null; } @@ -216,7 +243,7 @@ class Attributes implements IteratorAggregate, ArrayAccess } /** - * @return array + * @return array */ public function getArray(): array { diff --git a/src/Html5/Enums/BooleanAttribute.php b/src/Html5/Enums/BooleanAttribute.php new file mode 100644 index 0000000..f1f206c --- /dev/null +++ b/src/Html5/Enums/BooleanAttribute.php @@ -0,0 +1,16 @@ +. Any attribute set to + * BooleanAttribute::false will not render. + */ +enum BooleanAttribute { + /** Render an attribute with no value */ + case true; + /** Do not render this attribute */ + case false; +} \ No newline at end of file diff --git a/src/Html5/Enums/ReferrerPolicy_script.php b/src/Html5/Enums/ReferrerPolicy_script.php new file mode 100644 index 0000000..c4511e7 --- /dev/null +++ b/src/Html5/Enums/ReferrerPolicy_script.php @@ -0,0 +1,53 @@ + elements. + * + * Description by Mozilla Contributors licensed under CC-BY-SA 2.5 + * https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script + */ +enum ReferrerPolicy_script: string +{ + /** + * (default): Send a full URL when performing a same-origin request, only + * send the origin when the protocol security level stays the same + * (HTTPS→HTTPS), and send no header to a less secure destination + * (HTTPS→HTTP). + */ + case strictOriginWhenCrossOrigin = "strict-origin-when-cross-origin"; + /** + * means that the Referer header will not be sent. + */ + case noReferrer = "no-referrer"; + /** + * The sent referrer will be limited to the origin of the referring page: + * its scheme, host, and port. + */ + case origin = "origin"; + /** + * The referrer sent to other origins will be limited to the scheme, the + * host, and the port. Navigations on the same origin will still include the + * path. + */ + case originWhenCrossOrigin = "origin-when-cross-origin"; + /** + * A referrer will be sent for same origin, but cross-origin requests will + * contain no referrer information. + */ + case sameOrigin = "same-origin"; + /** + * Only send the origin of the document as the referrer when the protocol + * security level stays the same (HTTPS→HTTPS), but don't send it to a less + * secure destination (HTTPS→HTTP). + */ + case strictOrigin = "strict-origin"; + /** + * The referrer will include the origin and the path (but not the fragment, + * password, or username). This value is unsafe, because it leaks origins + * and paths from TLS-protected resources to insecure origins. + */ + case unsafeUrl = "unsafe-url"; +} \ No newline at end of file diff --git a/src/Html5/Enums/Type_script.php b/src/Html5/Enums/Type_script.php new file mode 100644 index 0000000..43fda17 --- /dev/null +++ b/src/Html5/Enums/Type_script.php @@ -0,0 +1,38 @@ + element indicates the type of script + * represented by the element: a classic script, a JavaScript module, an import + * map, or a data block. + * + * Descriptions by Mozilla Contributors licensed under CC-BY-SA 2.5 + * https://developer.mozilla.org/en-US/docs/Web/HTML/Element/script/type + */ +enum Type_script: string +{ + /** + * Indicates that the script is a "classic script", containing JavaScript + * code. Authors are encouraged to omit the attribute if the script refers + * to JavaScript code rather than specify a MIME type. JavaScript MIME types + * are listed in the IANA media types specification. + * + * Equivalent to the attribute being unset. + */ + case default = "text/javascript"; + /** + * This value causes the code to be treated as a JavaScript module. The + * processing of the script contents is deferred. The charset and defer + * attributes have no effect. For information on using module, see our + * JavaScript modules guide. Unlike classic scripts, module scripts require + * the use of the CORS protocol for cross-origin fetching. + */ + case module = "module"; + /** + * This value indicates that the body of the element contains an import map. + * The import map is a JSON object that developers can use to control how + * the browser resolves module specifiers when importing JavaScript modules + */ + case importMap = "importmap"; +} \ No newline at end of file diff --git a/src/Html5/Tags/BaseTag.php b/src/Html5/Tags/BaseTag.php index 3305ee3..3142251 100644 --- a/src/Html5/Tags/BaseTag.php +++ b/src/Html5/Tags/BaseTag.php @@ -26,9 +26,9 @@ class BaseTag extends AbstractTag implements MetadataContent * Absolute and relative URLs are allowed. data: and javascript: URLs are * not allowed. * - * @return null|string + * @return null|string|Stringable */ - public function href(): null|string + public function href(): null|string|Stringable { return $this->attributes()->asString('href'); } @@ -38,16 +38,13 @@ class BaseTag extends AbstractTag implements MetadataContent * Absolute and relative URLs are allowed. data: and javascript: URLs are * not allowed. * - * @param null|string $href + * @param null|string|Stringable $href * @return static */ - public function setHref(null|string $href): static + public function setHref(null|string|Stringable $href): static { - if (!$href) { - $this->attributes()['href'] = false; - } else { - $this->attributes()['href'] = $href; - } + if ($href) $this->attributes()['href'] = $href; + else $this->unsetHref(); return $this; } @@ -88,7 +85,7 @@ class BaseTag extends AbstractTag implements MetadataContent public function setTarget(null|string|Stringable|BrowsingContext $target): static { if (!$target) { - $this->attributes()['target'] = false; + $this->unsetTarget(); } elseif ($target instanceof BrowsingContext) { $this->attributes()['target'] = $target->value; } else { diff --git a/src/Html5/Tags/LinkTag.php b/src/Html5/Tags/LinkTag.php index 2ca249f..c2cbec9 100644 --- a/src/Html5/Tags/LinkTag.php +++ b/src/Html5/Tags/LinkTag.php @@ -47,17 +47,17 @@ class LinkTag extends AbstractTag implements MetadataContent * * if $as is As_link::fetch then $crossorigin must be specified * - * @param null|Rel_link|StringableEnumArray|array $rel + * @param null|Rel_link|array $rel * @param null|As_link|null $as * @param null|CrossOrigin|null $crossorigin * @return static */ - public function setRel(null|Rel_link|StringableEnumArray|array $rel, null|As_link $as = null, null|CrossOrigin $crossorigin = null): static + public function setRel(null|Rel_link|array $rel, null|As_link $as = null, null|CrossOrigin $crossorigin = null): static { if (!$rel) { - $this->attributes()['rel'] = false; + $this->unsetRel(); } else { - $this->attributes()->setEnumArray('rel',$rel,Rel_link::class,' '); + $this->attributes()->setEnumArray('rel', $rel, Rel_link::class, ' '); // check if new value includes Rel_link::preload and require $as if so $rel = $this->rel(); if (in_array(Rel_link::preload, $rel)) { @@ -116,7 +116,7 @@ class LinkTag extends AbstractTag implements MetadataContent public function setAs(null|As_link $as, null|CrossOrigin $crossorigin = null): static { if (!$as) { - $this->attributes()['as'] = false; + $this->unsetAs(); } else { $this->attributes()['as'] = $as->value; // check if we just set as to As_link::fetch and require $crossorigin if so @@ -171,7 +171,7 @@ class LinkTag extends AbstractTag implements MetadataContent public function setCrossorigin(null|CrossOrigin $crossorigin): static { if (!$crossorigin) { - $this->attributes()['crossorigin'] = false; + $this->unsetCrossorigin(); } else { $this->attributes()['crossorigin'] = $crossorigin->value; } @@ -211,11 +211,8 @@ class LinkTag extends AbstractTag implements MetadataContent */ public function setHref(null|string|Stringable $href): static { - if (!$href) { - $this->attributes()['href'] = false; - } else { - $this->attributes()['href'] = $href; - } + if ($href) $this->attributes()['href'] = $href; + else $this->unsetHref(); return $this; } @@ -258,11 +255,8 @@ class LinkTag extends AbstractTag implements MetadataContent */ public function setHreflang(null|string|Stringable $hreflang): static { - if (!$hreflang) { - $this->attributes()['hreflang'] = false; - } else { - $this->attributes()['hreflang'] = $hreflang; - } + if ($hreflang) $this->attributes()['hreflang'] = $hreflang; + else $this->unsetHreflang(); return $this; } @@ -304,11 +298,8 @@ class LinkTag extends AbstractTag implements MetadataContent */ public function setImagesizes(null|string|Stringable $imagesizes): static { - if (!$imagesizes) { - $this->attributes()['imagesizes'] = false; - } else { - $this->attributes()['imagesizes'] = $imagesizes; - } + if ($imagesizes) $this->attributes()['imagesizes'] = $imagesizes; + else $this->unsetImagesizes(); return $this; } @@ -350,11 +341,8 @@ class LinkTag extends AbstractTag implements MetadataContent */ public function setImagesrcset(null|string|Stringable $imagesrcset): static { - if (!$imagesrcset) { - $this->attributes()['imagesrcset'] = false; - } else { - $this->attributes()['imagesrcset'] = $imagesrcset; - } + if ($imagesrcset) $this->attributes()['imagesrcset'] = $imagesrcset; + else $this->unsetImagesrcset(); return $this; } @@ -396,11 +384,8 @@ class LinkTag extends AbstractTag implements MetadataContent */ public function setIntegrity(null|string|Stringable $integrity): static { - if (!$integrity) { - $this->attributes()['integrity'] = false; - } else { - $this->attributes()['integrity'] = $integrity; - } + if ($integrity) $this->attributes()['integrity'] = $integrity; + else $this->unsetIntegrity(); return $this; } @@ -442,11 +427,8 @@ class LinkTag extends AbstractTag implements MetadataContent */ public function setMedia(null|string|Stringable $media): static { - if (!$media) { - $this->attributes()['media'] = false; - } else { - $this->attributes()['media'] = $media; - } + if ($media) $this->attributes()['media'] = $media; + else $this->unsetMedia(); return $this; } @@ -482,11 +464,8 @@ class LinkTag extends AbstractTag implements MetadataContent */ public function setReferrerpolicy(null|ReferrerPolicy_link $referrerpolicy): static { - if (!$referrerpolicy) { - $this->attributes()['referrerpolicy'] = false; - } else { - $this->attributes()['referrerpolicy'] = $referrerpolicy->value; - } + if ($referrerpolicy) $this->attributes()['referrerpolicy'] = $referrerpolicy->value; + else $this->unsetReferrerpolicy(); return $this; } @@ -533,11 +512,8 @@ class LinkTag extends AbstractTag implements MetadataContent */ public function setType(null|string|Stringable $type): static { - if (!$type) { - $this->attributes()['type'] = false; - } else { - $this->attributes()['type'] = $type; - } + if ($type) $this->attributes()['type'] = $type; + else $this->unsetType(); return $this; } diff --git a/src/Html5/Tags/NoscriptTag.php b/src/Html5/Tags/NoscriptTag.php index 6d7f540..3ac2bda 100644 --- a/src/Html5/Tags/NoscriptTag.php +++ b/src/Html5/Tags/NoscriptTag.php @@ -8,10 +8,12 @@ use ByJoby\HTML\DisplayTypes\DisplayContents; use ByJoby\HTML\Tags\AbstractContentTag; /** - * - * + * The