diff --git a/.gitattributes b/.gitattributes new file mode 100755 index 00000000..77b544ff --- /dev/null +++ b/.gitattributes @@ -0,0 +1,14 @@ +/tests export-ignore +/tests linguist-documentation +/.scrutinizar.yml export-ignore +/.travis.yml export-ignore +/.gitignore export-ignore +/CHANGELOG.md export-ignore +/CONTRIBUTING.md export-ignore +/LICENSE.md export-ignore +/README.md export-ignore +/phpunit.php export-ignore +/phpunit.xml export-ignore +/infection.json.dist export-ignore +/.phan export-ignore +/.php_cs.dist export-ignore diff --git a/.github/FUNDING.yml b/.github/FUNDING.yml new file mode 100644 index 00000000..8fe59770 --- /dev/null +++ b/.github/FUNDING.yml @@ -0,0 +1,12 @@ +# These are supported funding model platforms + +# github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] +# patreon: # Replace with a single Patreon username +# open_collective: # Replace with a single Open Collective username +# ko_fi: # Replace with a single Ko-fi username +tidelift: "packagist/paquettg/php-html-parser" +# community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry +# liberapay: # Replace with a single Liberapay username +# issuehunt: # Replace with a single IssueHunt username +# otechie: # Replace with a single Otechie username +# custom: # Replace with up to 4 custom sponsorship URLs e.g., ['link1', 'link2'] diff --git a/.gitignore b/.gitignore new file mode 100755 index 00000000..9a550fad --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +composer.phar +composer.lock +infection.log +/vendor/ +.idea/ +.php_cs.cache +*.swp diff --git a/.phan/config.php b/.phan/config.php new file mode 100755 index 00000000..8c25e9d5 --- /dev/null +++ b/.phan/config.php @@ -0,0 +1,382 @@ +=7.1" + 'target_php_version' => '7.1', + + // If enabled, missing properties will be created when + // they are first seen. If false, we'll report an + // error message if there is an attempt to write + // to a class property that wasn't explicitly + // defined. + 'allow_missing_properties' => false, + + // If enabled, null can be cast to any type and any + // type can be cast to null. Setting this to true + // will cut down on false positives. + 'null_casts_as_any_type' => false, + + // If enabled, allow null to be cast as any array-like type. + // + // This is an incremental step in migrating away from `null_casts_as_any_type`. + // If `null_casts_as_any_type` is true, this has no effect. + 'null_casts_as_array' => false, + + // If enabled, allow any array-like type to be cast to null. + // This is an incremental step in migrating away from `null_casts_as_any_type`. + // If `null_casts_as_any_type` is true, this has no effect. + 'array_casts_as_null' => false, + + // If enabled, scalars (int, float, bool, string, null) + // are treated as if they can cast to each other. + // This does not affect checks of array keys. See `scalar_array_key_cast`. + 'scalar_implicit_cast' => false, + + // If enabled, any scalar array keys (int, string) + // are treated as if they can cast to each other. + // E.g. `array` can cast to `array` and vice versa. + // Normally, a scalar type such as int could only cast to/from int and mixed. + 'scalar_array_key_cast' => false, + + // If this has entries, scalars (int, float, bool, string, null) + // are allowed to perform the casts listed. + // + // E.g. `['int' => ['float', 'string'], 'float' => ['int'], 'string' => ['int'], 'null' => ['string']]` + // allows casting null to a string, but not vice versa. + // (subset of `scalar_implicit_cast`) + 'scalar_implicit_partial' => [], + + // If enabled, Phan will warn if **any** type in a method invocation's object + // is definitely not an object, + // or if **any** type in an invoked expression is not a callable. + // Setting this to true will introduce numerous false positives + // (and reveal some bugs). + 'strict_method_checking' => true, + + // If enabled, Phan will warn if **any** type of the object expression for a property access + // does not contain that property. + 'strict_object_checking' => true, + + // If enabled, Phan will warn if **any** type in the argument's union type + // cannot be cast to a type in the parameter's expected union type. + // Setting this to true will introduce numerous false positives + // (and reveal some bugs). + 'strict_param_checking' => true, + + // If enabled, Phan will warn if **any** type in a property assignment's union type + // cannot be cast to a type in the property's declared union type. + // Setting this to true will introduce numerous false positives + // (and reveal some bugs). + 'strict_property_checking' => true, + + // If enabled, Phan will warn if **any** type in a returned value's union type + // cannot be cast to the declared return type. + // Setting this to true will introduce numerous false positives + // (and reveal some bugs). + 'strict_return_checking' => true, + + // If true, seemingly undeclared variables in the global + // scope will be ignored. + // + // This is useful for projects with complicated cross-file + // globals that you have no hope of fixing. + 'ignore_undeclared_variables_in_global_scope' => false, + + // Set this to false to emit `PhanUndeclaredFunction` issues for internal functions that Phan has signatures for, + // but aren't available in the codebase, or from Reflection. + // (may lead to false positives if an extension isn't loaded) + // + // If this is true(default), then Phan will not warn. + // + // Even when this is false, Phan will still infer return values and check parameters of internal functions + // if Phan has the signatures. + 'ignore_undeclared_functions_with_known_signatures' => false, + + // Backwards Compatibility Checking. This is slow + // and expensive, but you should consider running + // it before upgrading your version of PHP to a + // new version that has backward compatibility + // breaks. + // + // If you are migrating from PHP 5 to PHP 7, + // you should also look into using + // [php7cc (no longer maintained)](https://github.com/sstalle/php7cc) + // and [php7mar](https://github.com/Alexia/php7mar), + // which have different backwards compatibility checks. + 'backward_compatibility_checks' => false, + + // If true, check to make sure the return type declared + // in the doc-block (if any) matches the return type + // declared in the method signature. + 'check_docblock_signature_return_type_match' => true, + + // If true, make narrowed types from phpdoc params override + // the real types from the signature, when real types exist. + // (E.g. allows specifying desired lists of subclasses, + // or to indicate a preference for non-nullable types over nullable types) + // + // Affects analysis of the body of the method and the param types passed in by callers. + // + // (*Requires `check_docblock_signature_param_type_match` to be true*) + 'prefer_narrowed_phpdoc_param_type' => true, + + // (*Requires `check_docblock_signature_return_type_match` to be true*) + // + // If true, make narrowed types from phpdoc returns override + // the real types from the signature, when real types exist. + // + // (E.g. allows specifying desired lists of subclasses, + // or to indicate a preference for non-nullable types over nullable types) + // + // This setting affects the analysis of return statements in the body of the method and the return types passed in by callers. + 'prefer_narrowed_phpdoc_return_type' => true, + + // If enabled, check all methods that override a + // parent method to make sure its signature is + // compatible with the parent's. + // + // This check can add quite a bit of time to the analysis. + // + // This will also check if final methods are overridden, etc. + 'analyze_signature_compatibility' => true, + + // This setting maps case-insensitive strings to union types. + // + // This is useful if a project uses phpdoc that differs from the phpdoc2 standard. + // + // If the corresponding value is the empty string, + // then Phan will ignore that union type (E.g. can ignore 'the' in `@return the value`) + // + // If the corresponding value is not empty, + // then Phan will act as though it saw the corresponding UnionTypes(s) + // when the keys show up in a UnionType of `@param`, `@return`, `@var`, `@property`, etc. + // + // This matches the **entire string**, not parts of the string. + // (E.g. `@return the|null` will still look for a class with the name `the`, but `@return the` will be ignored with the below setting) + // + // (These are not aliases, this setting is ignored outside of doc comments). + // (Phan does not check if classes with these names exist) + // + // Example setting: `['unknown' => '', 'number' => 'int|float', 'char' => 'string', 'long' => 'int', 'the' => '']` + 'phpdoc_type_mapping' => [], + + // Set to true in order to attempt to detect dead + // (unreferenced) code. Keep in mind that the + // results will only be a guess given that classes, + // properties, constants and methods can be referenced + // as variables (like `$class->$property` or + // `$class->$method()`) in ways that we're unable + // to make sense of. + 'dead_code_detection' => false, + + // Set to true in order to attempt to detect unused variables. + // `dead_code_detection` will also enable unused variable detection. + // + // This has a few known false positives, e.g. for loops or branches. + 'unused_variable_detection' => true, + + // Set to true in order to attempt to detect redundant and impossible conditions. + // + // This has some false positives involving loops, + // variables set in branches of loops, and global variables. + 'redundant_condition_detection' => true, + + // If enabled, Phan will act as though it's certain of real return types of a subset of internal functions, + // even if those return types aren't available in reflection (real types were taken from php 7.3 or 8.0-dev, depending on target_php_version). + // + // Note that with php 7 and earlier, php would return null or false for many internal functions if the argument types or counts were incorrect. + // As a result, enabling this setting with target_php_version 8.0 may result in false positives for `--redundant-condition-detection` when codebases also support php 7.x. + 'assume_real_types_for_internal_functions' => true, + + // If true, this runs a quick version of checks that takes less + // time at the cost of not running as thorough + // of an analysis. You should consider setting this + // to true only when you wish you had more **undiagnosed** issues + // to fix in your code base. + // + // In quick-mode the scanner doesn't rescan a function + // or a method's code block every time a call is seen. + // This means that the problem here won't be detected: + // + // ```php + // false, + + // Enable or disable support for generic templated + // class types. + 'generic_types_enabled' => true, + + // Override to hardcode existence and types of (non-builtin) globals in the global scope. + // Class names should be prefixed with `\`. + // + // (E.g. `['_FOO' => '\FooClass', 'page' => '\PageClass', 'userId' => 'int']`) + 'globals_type_map' => [], + + // The minimum severity level to report on. This can be + // set to `Issue::SEVERITY_LOW`, `Issue::SEVERITY_NORMAL` or + // `Issue::SEVERITY_CRITICAL`. Setting it to only + // critical issues is a good place to start on a big + // sloppy mature code base. + 'minimum_severity' => Issue::SEVERITY_LOW, + + // Add any issue types (such as `'PhanUndeclaredMethod'`) + // to this black-list to inhibit them from being reported. + 'suppress_issue_types' => [], + + // A regular expression to match files to be excluded + // from parsing and analysis and will not be read at all. + // + // This is useful for excluding groups of test or example + // directories/files, unanalyzable files, or files that + // can't be removed for whatever reason. + // (e.g. `'@Test\.php$@'`, or `'@vendor/.*/(tests|Tests)/@'`) + 'exclude_file_regex' => '@^vendor/.*/(tests?|Tests?)/@', + + // A list of files that will be excluded from parsing and analysis + // and will not be read at all. + // + // This is useful for excluding hopelessly unanalyzable + // files that can't be removed for whatever reason. + 'exclude_file_list' => [], + + // A directory list that defines files that will be excluded + // from static analysis, but whose class and method + // information should be included. + // + // Generally, you'll want to include the directories for + // third-party code (such as "vendor/") in this list. + // + // n.b.: If you'd like to parse but not analyze 3rd + // party code, directories containing that code + // should be added to the `directory_list` as well as + // to `exclude_analysis_directory_list`. + 'exclude_analysis_directory_list' => [ + 'vendor/', + ], + + // Enable this to enable checks of require/include statements referring to valid paths. + 'enable_include_path_checks' => true, + + // The number of processes to fork off during the analysis + // phase. + 'processes' => 1, + + // List of case-insensitive file extensions supported by Phan. + // (e.g. `['php', 'html', 'htm']`) + 'analyzed_file_extensions' => [ + 'php', + ], + + // You can put paths to stubs of internal extensions in this config option. + // If the corresponding extension is **not** loaded, then Phan will use the stubs instead. + // Phan will continue using its detailed type annotations, + // but load the constants, classes, functions, and classes (and their Reflection types) + // from these stub files (doubling as valid php files). + // Use a different extension from php to avoid accidentally loading these. + // The `tools/make_stubs` script can be used to generate your own stubs (compatible with php 7.0+ right now) + // + // (e.g. `['xdebug' => '.phan/internal_stubs/xdebug.phan_php']`) + 'autoload_internal_extension_signatures' => [], + + // A list of plugin files to execute. + // + // Plugins which are bundled with Phan can be added here by providing their name (e.g. `'AlwaysReturnPlugin'`) + // + // Documentation about available bundled plugins can be found [here](https://github.com/phan/phan/tree/master/.phan/plugins). + // + // Alternately, you can pass in the full path to a PHP file with the plugin's implementation (e.g. `'vendor/phan/phan/.phan/plugins/AlwaysReturnPlugin.php'`) + 'plugins' => [ + 'AlwaysReturnPlugin', + 'DollarDollarPlugin', + 'DuplicateArrayKeyPlugin', + 'DuplicateExpressionPlugin', + 'PregRegexCheckerPlugin', + 'PrintfCheckerPlugin', + 'SleepCheckerPlugin', + 'UnreachableCodePlugin', + 'UseReturnValuePlugin', + 'EmptyStatementListPlugin', + 'StrictComparisonPlugin', + 'LoopVariableReusePlugin', + ], + + // A list of directories that should be parsed for class and + // method information. After excluding the directories + // defined in `exclude_analysis_directory_list`, the remaining + // files will be statically analyzed for errors. + // + // Thus, both first-party and third-party code being used by + // your application should be included in this list. + 'directory_list' => [ + 'src/PHPHtmlParser', + 'vendor/infection/infection/src', + 'vendor/mockery/mockery/library', + 'vendor/paquettg/string-encode/src', + 'vendor/phan/phan/src/Phan', + 'vendor/php-coveralls/php-coveralls/src', + 'vendor/phpunit/phpunit/src', + ], + + // A list of individual files to include in analysis + // with a path relative to the root directory of the + // project. + 'file_list' => [], +]; diff --git a/.php_cs.dist b/.php_cs.dist new file mode 100644 index 00000000..2ead7195 --- /dev/null +++ b/.php_cs.dist @@ -0,0 +1,149 @@ +in('src') + ->in('tests') +; + +return PhpCsFixer\Config::create() + ->setRiskyAllowed(true) + ->setRules([ + 'array_indentation' => true, + 'array_syntax' => ['syntax' => 'short'], + 'binary_operator_spaces' => ['align_double_arrow'=>true], + 'blank_line_after_namespace' => true, + 'blank_line_after_opening_tag' => true, + 'blank_line_before_statement' => ['statements'=>['return']], + 'braces' => ['allow_single_line_closure' => false], + 'cast_spaces' => true, + 'class_attributes_separation' => ['elements'=>['method']], + 'class_definition' => ['single_line'=>true], + 'combine_consecutive_issets' => true, + 'concat_space' => ['spacing' => 'one'], + 'declare_equal_normalize' => true, + 'elseif' => true, + 'encoding' => true, + 'full_opening_tag' => true, + 'function_declaration' => true, + 'function_typehint_space' => true, + 'include' => true, + 'increment_style' => true, + 'indentation_type' => true, + 'line_ending' => true, + 'linebreak_after_opening_tag' => true, + 'lowercase_cast' => true, + 'lowercase_keywords' => true, + 'lowercase_static_reference' => true, + 'magic_constant_casing' => true, + 'magic_method_casing' => true, + 'mb_str_functions' => false, + 'method_argument_space' => true, + 'native_function_casing' => true, + 'native_function_invocation' => true, + 'native_function_type_declaration_casing' => true, + 'new_with_braces' => true, + 'no_blank_lines_after_class_opening' => true, + 'no_blank_lines_after_phpdoc' => true, + 'no_break_comment' => true, + 'no_closing_tag' => true, + 'no_empty_comment' => true, + 'no_empty_phpdoc' => true, + 'no_empty_statement' => true, + 'no_extra_blank_lines' => ['tokens'=>[ + 'curly_brace_block', + 'extra', + 'parenthesis_brace_block', + 'square_brace_block', + 'throw', + 'use', + ]], + 'no_leading_import_slash' => true, + 'no_leading_namespace_whitespace' => true, + 'no_mixed_echo_print' => true, + 'no_multiline_whitespace_around_double_arrow' => true, + 'no_null_property_initialization' => true, + 'no_short_bool_cast' => true, + 'no_singleline_whitespace_before_semicolons' => true, + 'no_superfluous_elseif' => true, + 'no_spaces_after_function_name' => true, + 'no_spaces_around_offset' => true, + 'no_spaces_inside_parenthesis' => true, + 'no_superfluous_phpdoc_tags' => ['allow_mixed' => true, 'allow_unused_params' => true], + 'no_trailing_comma_in_list_call' => true, + 'no_trailing_comma_in_singleline_array' => true, + 'no_trailing_whitespace' => true, + 'no_trailing_whitespace_in_comment' => true, + 'no_unneeded_control_parentheses' => true, + 'no_unneeded_curly_braces' => ['namespaces' => true], + 'no_unused_imports' => true, + 'no_useless_else' => true, + 'no_useless_return' => true, + 'no_whitespace_before_comma_in_array' => true, + 'no_whitespace_in_blank_line' => true, + 'normalize_index_brace' => true, + 'not_operator_with_space' => false, + 'object_operator_without_whitespace' => true, + 'ordered_class_elements' => true, + 'ordered_imports' => true, + 'php_unit_fqcn_annotation' => true, + 'phpdoc_align' => ['tags' => [ + 'method', + 'param', + 'property', + 'property-read', + 'return', + 'throws', + 'type', + 'var', + ]], + 'phpdoc_add_missing_param_annotation' => true, + 'phpdoc_annotation_without_dot' => true, + 'phpdoc_indent' => true, + 'phpdoc_inline_tag' => true, + 'phpdoc_no_access' => true, + 'phpdoc_no_alias_tag' => false, + 'phpdoc_no_package' => true, + 'phpdoc_no_useless_inheritdoc' => true, + 'phpdoc_order' => true, + 'phpdoc_return_self_reference' => true, + 'phpdoc_scalar' => true, + 'phpdoc_separation' => true, + 'phpdoc_single_line_var_spacing' => true, + 'phpdoc_summary' => true, + 'phpdoc_to_comment' => true, + 'phpdoc_trim' => true, + 'phpdoc_trim_consecutive_blank_line_separation' => true, + 'phpdoc_types' => true, + 'phpdoc_types_order' => ['null_adjustment' => 'always_last', 'sort_algorithm' => 'none'], + 'phpdoc_var_without_name' => true, + 'return_assignment' => true, + 'return_type_declaration' => true, + 'semicolon_after_instruction' => true, + 'simplified_null_return' => true, + 'short_scalar_cast' => true, + 'single_blank_line_at_eof' => true, + 'single_blank_line_before_namespace' => true, + 'single_class_element_per_statement' => true, + 'single_import_per_statement' => true, + 'single_line_after_imports' => true, + 'single_line_comment_style' => ['comment_types' => ['hash']], + 'single_line_throw' => true, + 'single_quote' => true, + 'single_trait_insert_per_statement' => true, + 'space_after_semicolon' => ['remove_in_empty_for_expressions'=>true], + 'standardize_increment' => true, + 'standardize_not_equals' => true, + 'switch_case_semicolon_to_colon' => true, + 'switch_case_space' => true, + 'ternary_operator_spaces' => true, + 'ternary_to_null_coalescing' => true, + 'trailing_comma_in_multiline_array' => true, + 'trim_array_spaces' => true, + 'unary_operator_spaces' => true, + 'visibility_required' => true, + 'whitespace_after_comma_in_array' => true, + 'yoda_style' => false, + ]) + ->setFinder($finder) + ->setCacheFile(__DIR__.'/.php_cs.cache') +; \ No newline at end of file diff --git a/.scrutinizer.yml b/.scrutinizer.yml new file mode 100755 index 00000000..8b3532b9 --- /dev/null +++ b/.scrutinizer.yml @@ -0,0 +1,41 @@ +filter: + paths: [src/*] + excluded_paths: [tests/*] +checks: + php: + code_rating: true + remove_extra_empty_lines: true + remove_php_closing_tag: true + remove_trailing_whitespace: true + fix_use_statements: + remove_unused: true + preserve_multiple: false + preserve_blanklines: true + order_alphabetically: true + fix_php_opening_tag: true + fix_linefeed: true + fix_line_ending: true + fix_identation_4spaces: true + fix_doc_comments: true +tools: + external_code_coverage: + timeout: 600 + runs: 3 + php_code_coverage: false + php_code_sniffer: + config: + standard: PSR2 + filter: + paths: ['src'] + php_loc: + enabled: true + excluded_dirs: [vendor, test] + php_cpd: + enabled: true + excluded_dirs: [vendor, test] +build: + nodes: + analysis: + tests: + override: + - php-scrutinizer-run diff --git a/.travis.yml b/.travis.yml old mode 100644 new mode 100755 index eecd4a4e..25ba270f --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,9 @@ language: php php: - - 5.4 - - 5.5 - - 5.6 - - hhvm + - 7.2 + - 7.3 + - 7.4 install: - composer self-update @@ -14,5 +13,7 @@ script: - mkdir -p build/logs - php vendor/bin/phpunit --coverage-clover build/logs/clover.xml -after_script: - - php vendor/bin/coveralls +after_success: + - travis_retry php vendor/bin/php-coveralls -v + - wget https://scrutinizer-ci.com/ocular.phar + - php ocular.phar code-coverage:upload --format=php-clover build/logs/clover.xml diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100755 index 00000000..3fbf0bb4 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,93 @@ +### Development + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## 3.1.1 + +### Changed +- Fixed issue with numbers in comments. +- Updated minimume php version to correct version. +- Comment tags are now self-closing when cleanup input is set to false. + +## 3.1.0 + +### Changed +- Updated to include Tidelift subscription option. +- Removed php-coverall. +- Removed Guzzle 6 Adapter. +- Added support for Guzzle 7. + +## 3.0.1 + +### Changed +- Updated all DTOs to make them immutable. +- Updated documentation. + +## 3.0.0 + +### Added +- Support for PSR7 HTTP clients and requests for URL calls has been added. +- PHAN support and fixed all issues from PHAN has been added. +- PHP-CS-Fixer added. +- Support for html5 charset detection. +- Added the ability to match both parent and children. +- Added character set conversion in load. + +### Changed +- Fixed issue with \ causing an infite loop. +- CDATA should not be altered when cleanupInput is false. +- Added tag attribute DTO. +- Cleaned up the selector logic. +- Fixed issue with greedy regex for charset detection. +- Fixed bug causing infinite loops in some cases. +- Refactored the way we handle options. Removed the magical option array. + +### Removed +- Curl interface and curl implementation has been removed. +- Removed support for the depth first search option. +- `findById()` method removed from Dom object. +- Removed `load()` method in Dom object. +- Removed support for php 7.1. + +## 2.2.1 + +### Added +- Added php_cs. +- Added support for PSR7 requests. +- Added the attribute type dto. + +## 2.2.0 + +### Added +- Added support for php 7.4. +- Added custom header support for curl request. +- Added gzip detection and decoding. +- Added additional type checking. + +### Changed +- Fixed bug with multiple selectors query. +- Updated documentation. +- Fixed issue with Dom object. + + +## 2.1.0 + +### Added +- New `removeSmartyScripts` configuration setting. Defaults to true. +- Added `declare(strict_types=1)` to all source files. +- Added new option `depthFirstSearch`. +- Deprecated option `depthFirstSearch` and marked for removal in `3.0.0`. +- Added multi class selections support. +- Added case insensitive attribute matching. +- Added new option `htmlSpecialCharsDecode`. + +### Changed +- Started using a changelog. +- Fixed bug that caused an infinite loop when no content found in tags. +- Moved the Mock object to the tests directory, where it belongs. +- Changes from `PSR-0` to `PSR-4` autoloading. +- Updated `CONTRIBUTING.md` contents. +- Updated docblocks. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md old mode 100644 new mode 100755 index 9705a2f8..df0ac285 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,20 +2,13 @@ This page contains guidelines for contributing to the PHPHtmlParser package. Please review these guidelines before submitting any puLl requests to the package. -## Pull Requests - -The pull request process differs for new features and bugs. Before sending a pull request for a new feature, you should first create an issue with `[Proposal]` in the title. The proposal should describe the new feature, as well as implementation ideas. The proposal will then be reviewed and either approved or denied. Once a proposal is approved, a pull request may be created implementing the new feature. Pull requests which do not follow this guideline will be closed immediately. - -Pull requests for bugs may be sent without creating any proposal issue. If you believe that you know of a solution for a bug that has been filed on Github, please leave a comment detailing your proposed fix. - -### Feature Requests - -If you have an idea for a new feature you would like to see added to the package, you may create an issue on Github with `[Request]` in the title. The feature request will then be reviewed. - ## Coding Guidelines -We follow the [PSR-0](https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-0.md) autoloading standard and take heavily from the [PSR-1](https://github.com/php-fig/fig-standards/blob/master/accepted/PSR-1-basic-coding-standard.md) coding standards. In addition to these standards, below is a list of other coding standards that should be followed: +We follow the [PSR-4](https://www.php-fig.org/psr/psr-4/) autoloading standard and follow the [PSR-12](https://www.php-fig.org/psr/psr-12/) coding style guide. To make it easy to comply with the coding standard we use php-cs-fixer to manage the style of the code base. Before pushing your code please ensure you run the following on your changes. + +```bash +./vendor/bin/php-cs-fixer fix +``` -- Class opening `{` should be on the same line as the class name. -- Function and control structure opening `{` should be on a separate line. -- Interface names are suffixed with `Interface` (`FooInterface`) +Please ensure you comply to these standards when creating a PR to make it easy to review and merge. +Thank you. diff --git a/LICENSE.md b/LICENSE.md old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 index ca785c70..6889b079 --- a/README.md +++ b/README.md @@ -1,44 +1,60 @@ PHP Html Parser ========================== -Version 1.6.7 - [![Build Status](https://travis-ci.org/paquettg/php-html-parser.png)](https://travis-ci.org/paquettg/php-html-parser) [![Coverage Status](https://coveralls.io/repos/paquettg/php-html-parser/badge.png)](https://coveralls.io/r/paquettg/php-html-parser) +[![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/paquettg/php-html-parser/badges/quality-score.png?b=master)](https://scrutinizer-ci.com/g/paquettg/php-html-parser/?branch=master) -PHPHtmlParser is a simple, flexible, html parser which allows you to select tags using any css selector, like jQuery. The goal is to assiste in the development of tools which require a quick, easy way to scrap html, whether it's valid or not! This project was original supported by [sunra/php-simple-html-dom-parser](https://github.com/sunra/php-simple-html-dom-parser) but the support seems to have stopped so this project is my adaptation of his previous work. +PHPHtmlParser is a simple, flexible, html parser which allows you to select tags using any css selector, like jQuery. The goal is to assist in the development of tools which require a quick, easy way to scrap html, whether it's valid or not! Install ------- -This package can be found on [packagist](https://packagist.org/packages/paquettg/php-html-parser) and is best loaded using [composer](http://getcomposer.org/). We support php 5.4, 5.5, and hhvm 2.3. +Install the latest version using composer. + +```bash +$ composer require paquettg/php-html-parser +``` + +This package can be found on [packagist](https://packagist.org/packages/paquettg/php-html-parser) and is best loaded using [composer](http://getcomposer.org/). We support php 7.2, 7.3, and 7.4. -Usage +Basic Usage ----- -You can find many examples of how to use the dom parser and any of its parts (which you will most likely never touch) in the tests directory. The tests are done using PHPUnit and are very small, a few lines each, and are a great place to start. Given that, I'll still be showing a few examples of how the package should be used. The following example is a very simplistic usage of the package. +You can find many examples of how to use the DOM parser and any of its parts (which you will most likely never touch) in the tests directory. The tests are done using PHPUnit and are very small, a few lines each, and are a great place to start. Given that, I'll still be showing a few examples of how the package should be used. The following example is a very simplistic usage of the package. ```php +// Assuming you installed from Composer: +require "vendor/autoload.php"; use PHPHtmlParser\Dom; $dom = new Dom; -$dom->load('

Hey bro, click here
:)

'); +$dom->loadStr('

Hey bro, click here
:)

'); $a = $dom->find('a')[0]; echo $a->text; // "click here" ``` -The above will output "click here". Simple no? There are many ways to get the same result from the dome, such as `$dom->getElementsbyTag('a')[0]` or `$dom->find('a', 0)` which can all be found in the tests or in the code itself. +The above will output "click here". Simple, no? There are many ways to get the same result from the DOM, such as `$dom->getElementsbyTag('a')[0]` or `$dom->find('a', 0)`, which can all be found in the tests or in the code itself. + +Support PHP Html Parser Financially +-------------- + +Get supported Monolog and help fund the project with the [Tidelift Subscription](https://tidelift.com/subscription/pkg/packagist-paquettg-php-html-parser?utm_source=packagist-paquettg-php-html-parser&utm_medium=referral&utm_campaign=enterprise). + +Tidelift delivers commercial support and maintenance for the open source dependencies you use to build your applications. Save time, reduce risk, and improve code health, while paying the maintainers of the exact dependencies you use. Loading Files ------------------ -You may also seamlessly load a file into the dom instead of a string, which is much more convinient and is how I except most developers will be loading the html. The following example is taken from our test and uses the "big.html" file found there. +You may also seamlessly load a file into the DOM instead of a string, which is much more convenient and is how I expect most developers will be loading the HTML. The following example is taken from our test and uses the "big.html" file found there. ```php +// Assuming you installed from Composer: +require "vendor/autoload.php"; use PHPHtmlParser\Dom; $dom = new Dom; -$dom->loadFromFile('tests/big.html'); +$dom->loadFromFile('tests/data/big.html'); $contents = $dom->find('.content-border'); echo count($contents); // 10 @@ -56,16 +72,16 @@ foreach ($contents as $content) } ``` -This example loads the html from big.html, a real page found online, and gets all the content-border classes to process. It also shows a few things you can do with a node but it is not an exhaustive list of methods that a node has avaiable. +This example loads the html from big.html, a real page found online, and gets all the content-border classes to process. It also shows a few things you can do with a node but it is not an exhaustive list of the methods that a node has available. -Alternativly, you can always use the `load()` method to load the file. It will attempt to find the file using `file_exists` and, if succesfull, will call `loadFromFile()` for you. The same applies to a URL and `loadFromUrl()` method. - -Loading Url +Loading URLs ---------------- -Loading a url is very similar to the way you would load the html from a file. +Loading a URL is very similar to the way you would load the HTML from a file. ```php +// Assuming you installed from Composer: +require "vendor/autoload.php"; use PHPHtmlParser\Dom; $dom = new Dom; @@ -73,63 +89,114 @@ $dom->loadFromUrl('http://google.com'); $html = $dom->outerHtml; // or -$dom->load('http://google.com'); +$dom->loadFromUrl('http://google.com'); $html = $dom->outerHtml; // same result as the first example ``` -What makes the loadFromUrl method note worthy is the `PHPHtmlParser\CurlInterface` parameter, an optional second parameter. By default, we use the `PHPHtmlParser\Curl` class to get the contents of the url. On the other hand, though, you can inject your own implementation of CurlInterface and we will attempt to load the url using what ever tool/settings you want, up to you. +loadFromUrl will, by default, use an implementation of the `\Psr\Http\Client\ClientInterface` to do the HTTP request and a default implementation of `\Psr\Http\Message\RequestInterface` to create the body of the request. You can easily implement your own version of either the client or request to use a custom HTTP connection when using loadFromUrl. ```php +// Assuming you installed from Composer: +require "vendor/autoload.php"; use PHPHtmlParser\Dom; -use App\Services\Connector; +use App\Services\MyClient; $dom = new Dom; -$dom->loadFromUrl('http://google.com', [], new Connector); +$dom->loadFromUrl('http://google.com', null, new MyClient()); $html = $dom->outerHtml; ``` -As long as the Connector object implements the `PHPHtmlParser\CurlInterface` interface properly it will use that object to get the content of the url instead of the default `PHPHtmlParser\Curl` class. +As long as the client object implements the interface properly, it will use that object to get the content of the url. Loading Strings --------------- -Loading a string directly, with out the checks in `load()` is also easely done. +Loading a string directly is also easily done. ```php +// Assuming you installed from Composer: +require "vendor/autoload.php"; use PHPHtmlParser\Dom; $dom = new Dom; -$dom->loadStr('String', []) +$dom->loadStr('String'); $html = $dom->outerHtml; ``` -If the string is to long, depending on your file system, the `load()` method will throw a warning. If this happens you can just call the above method to bypass the `is_file()` check in the `load()` method. - Options ------- You can also set parsing option that will effect the behavior of the parsing engine. You can set a global option array using the `setOptions` method in the `Dom` object or a instance specific option by adding it to the `load` method as an extra (optional) parameter. ```php +// Assuming you installed from Composer: +require "vendor/autoload.php"; use PHPHtmlParser\Dom; +use PHPHtmlParser\Options; $dom = new Dom; -$dom->setOptions([ - 'strict' => true, // Set a global option to enable strict html parsing. -]); +$dom->setOptions( + // this is set as the global option level. + (new Options()) + ->setStrict(true) +); -$dom->load('http://google.com', [ - 'whitespaceTextNode' => false, // Only applies to this load. -]); +$dom->loadFromUrl('http://google.com', + (new Options())->setWhitespaceTextNode(false) // only applies to this load. +); -$dom->load('http://gmail.com'); // will not have whitespaceTextNode set to false. +$dom->loadFromUrl('http://gmail.com'); // will not have whitespaceTextNode set to false. ``` -At the moment we support 3 options, strict, whitespaceTextNode and enforceEncoding. Strict, by default false, will throw a `StrickException` if it find that the html is not strict complient (all tags must have a clossing tag, no attribute with out a value, etc.). +At the moment we support 12 options. + +**Strict** + +Strict, by default false, will throw a `StrickException` if it find that the html is not strictly compliant (all tags must have a closing tag, no attribute with out a value, etc.). + +**whitespaceTextNode** The whitespaceTextNode, by default true, option tells the parser to save textnodes even if the content of the node is empty (only whitespace). Setting it to false will ignore all whitespace only text node found in the document. -The enforceEncoding, by default null, option will enforce an charater set to be used for reading the content and returning the content in that encoding. Setting it to null will trigger an attempt to figure out the encoding from within the content of the string given instead. +**enforceEncoding** + +The enforceEncoding, by default null, option will enforce an character set to be used for reading the content and returning the content in that encoding. Setting it to null will trigger an attempt to figure out the encoding from within the content of the string given instead. + +**cleanupInput** + +Set this to `false` to skip the entire clean up phase of the parser. If this is set to true the next 3 options will be ignored. Defaults to `true`. + +**removeScripts** + +Set this to `false` to skip removing the script tags from the document body. This might have adverse effects. Defaults to `true`. + +**removeStyles** + +Set this to `false` to skip removing of style tags from the document body. This might have adverse effects. Defaults to `true`. + +**preserveLineBreaks** + +Preserves Line Breaks if set to `true`. If set to `false` line breaks are cleaned up as part of the input clean up process. Defaults to `false`. + +**removeDoubleSpace** + +Set this to `false` if you want to preserve whitespace inside of text nodes. It is set to `true` by default. + +**removeSmartyScripts** + +Set this to `false` if you want to preserve smarty script found in the html content. It is set to `true` by default. + +**htmlSpecialCharsDecode** + +By default this is set to `false`. Setting this to `true` will apply the php function `htmlspecialchars_decode` too all attribute values and text nodes. + +**selfClosing** + +This option contains an array of all self closing tags. These tags must be self closing and the parser will force them to be so if you have strict turned on. You can update this list with any additional tags that can be used as a self closing tag when using strict. You can also remove tags from this array or clear it out completly. + +**noSlash** + +This option contains an array of all tags that can not be self closing. The list starts off as empty but you can add elements as you wish. Static Facade ------------- @@ -139,7 +206,7 @@ You can also mount a static facade for the Dom object. ```PHP PHPHtmlParser\StaticDom::mount(); -Dom::load('tests/big.hmtl'); +Dom::loadFromFile('tests/big.hmtl'); $objects = Dom::find('.content-border'); ``` @@ -152,8 +219,10 @@ Modifying The Dom You can always modify the dom that was created from any loading method. To change the attribute of any node you can just call the `setAttribute` method. ```php +use PHPHtmlParser\Dom; + $dom = new Dom; -$dom->load('

Hey bro, click here
:)

'); +$dom->loadStr('

Hey bro, click here
:)

'); $a = $dom->find('a')[0]; $a->setAttribute('class', 'foo'); echo $a->getAttribute('class'); // "foo" @@ -162,10 +231,40 @@ echo $a->getAttribute('class'); // "foo" You may also get the `PHPHtmlParser\Dom\Tag` class directly and manipulate it as you see fit. ```php +use PHPHtmlParser\Dom; + $dom = new Dom; -$dom->load('

Hey bro, click here
:)

'); +$dom->loadStr('

Hey bro, click here
:)

'); +/** @var Dom\Node\AbstractNode $a */ $a = $dom->find('a')[0]; $tag = $a->getTag(); $tag->setAttribute('class', 'foo'); echo $a->getAttribute('class'); // "foo" ``` + +It is also possible to remove a node from the tree. Simply call the `delete` method on any node to remove it from the tree. It is important to note that you should unset the node after removing it from the `DOM``, it will still take memory as long as it is not unset. + +```php +use PHPHtmlParser\Dom; + +$dom = new Dom; +$dom->loadStr('

Hey bro, click here
:)

'); +/** @var Dom\Node\AbstractNode $a */ +$a = $dom->find('a')[0]; +$a->delete(); +unset($a); +echo $dom; // '

Hey bro,
:)

'); +``` + +You can modify the text of `TextNode` objects easily. Please note that, if you set an encoding, the new text will be encoded using the existing encoding. + +```php +use PHPHtmlParser\Dom; + +$dom = new Dom; +$dom->loadStr('

Hey bro, click here
:)

'); +/** @var Dom\Node\InnerNode $a */ +$a = $dom->find('a')[0]; +$a->firstChild()->setText('biz baz'); +echo $dom; // '

Hey bro, biz baz
:)

' +``` diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..3fc4dfcc --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,11 @@ +# Security Policy + +## Supported Versions + +We only support the most recent version with security fixes. + +## Reporting a Vulnerability + +If you have found any issues that might have security implications, please refer to https://tidelift.com/security + +Do not report security reports publicly. diff --git a/composer.json b/composer.json old mode 100644 new mode 100755 index 28a16cb2..166886f7 --- a/composer.json +++ b/composer.json @@ -13,19 +13,26 @@ } ], "require": { - "php": ">=5.4", - "paquettg/string-encode": "~0.1.0" + "php": ">=7.2", + "ext-mbstring": "*", + "ext-zlib": "*", + "ext-curl": "*", + "paquettg/string-encode": "~1.0.0", + "php-http/httplug": "^2.1", + "guzzlehttp/guzzle": "^7.0", + "guzzlehttp/psr7": "^1.6", + "myclabs/php-enum": "^1.7" }, "require-dev": { - "phpunit/phpunit": "~4.8.0", - "satooshi/php-coveralls": "~0.6.0", - "mockery/mockery": "~0.9.0" + "phpunit/phpunit": "^7.5.1", + "mockery/mockery": "^1.2", + "infection/infection": "^0.13.4", + "phan/phan": "^2.4", + "friendsofphp/php-cs-fixer": "^2.16" }, "autoload": { - "psr-0": { - "PHPHtmlParser": "src/" + "psr-4": { + "PHPHtmlParser\\": "src/PHPHtmlParser" } - }, - "minimum-stability": "dev", - "prefer-stable": true + } } diff --git a/infection.json.dist b/infection.json.dist new file mode 100755 index 00000000..0243ccf4 --- /dev/null +++ b/infection.json.dist @@ -0,0 +1,14 @@ +{ + "timeout": 10, + "source": { + "directories": [ + "src\/PHPHtmlParser" + ] + }, + "logs": { + "text": "infection.log" + }, + "mutators": { + "@default": true + } +} \ No newline at end of file diff --git a/phpunit.php b/phpunit.php old mode 100644 new mode 100755 diff --git a/phpunit.xml b/phpunit.xml old mode 100644 new mode 100755 index 64a402ac..04b1d77d --- a/phpunit.xml +++ b/phpunit.xml @@ -1,27 +1,29 @@ - - - ./tests/ - - + + + ./tests/ + + - - - src - - vendor - - - + + + src + + vendor + + + diff --git a/src/PHPHtmlParser/Content.php b/src/PHPHtmlParser/Content.php old mode 100644 new mode 100755 index a4c998a9..f1332175 --- a/src/PHPHtmlParser/Content.php +++ b/src/PHPHtmlParser/Content.php @@ -1,245 +1,257 @@ '; - protected $slash = " />\r\n\t"; - protected $attr = ' >'; - - public function __construct($content) - { - $this->content = $content; - $this->size = strlen($content); - $this->pos = 0; - } - - public function getPosition() - { - return $this->pos; - } - - /** - * Gets the current character we are at. - * - * @param int $char - * @return string - */ - public function char($char = null) - { - $pos = $this->pos; - if ( ! is_null($char)) - { - $pos = $char; - } - - if ( ! isset($this->content[$pos])) - { - return ''; - } - - return $this->content[$pos]; - } - - /** - * Moves the current position forward. - * - * @param int $count - * @return $this - */ - public function fastForward($count) - { - $this->pos += $count; - return $this; - } - - /** - * Moves the current position backward. - * - * @param int $count - * @return $this - */ - public function rewind($count) - { - $this->pos -= $count; - if ($this->pos < 0) - { - $this->pos = 0; - } - return $this; - } - - /** - * Copy the content until we find the given string. - * - * @param string $string - * @param bool $char - * @param bool $escape - * @return string - */ - public function copyUntil($string, $char = false, $escape = false) - { - if ($this->pos >= $this->size) - { - // nothing left - return ''; - } - - if ($escape) - { - $position = $this->pos; - $found = false; - while( ! $found) - { - $position = strpos($this->content, $string, $position); - if ($position === false) - { - // reached the end - $found = true; - continue; - } - - if ($this->char($position - 1) == '\\') - { - // this character is escaped - ++$position; - continue; - } - - $found = true; - } - } - elseif ($char) - { - $position = strcspn($this->content, $string, $this->pos); - $position += $this->pos; - } - else - { - $position = strpos($this->content, $string, $this->pos); - } - - if ($position === false) - { - // could not find character, just return the remaining of the content - $return = substr($this->content, $this->pos, $this->size - $this->pos); - $this->pos = $this->size; - return $return; - } - - if ($position == $this->pos) - { - // we are at the right place - return ''; - } - - $return = substr($this->content, $this->pos, $position - $this->pos); - // set the new position - $this->pos = $position; - return $return; - } - - /** - * Copies the content until the string is found and return it - * unless the 'unless' is found in the substring. - * - * @param string $string - * @param string $unless - * @return string - */ - public function copyUntilUnless($string, $unless) - { - $lastPos = $this->pos; - $this->fastForward(1); - $foundString = $this->copyUntil($string, true, true); - - $position = strcspn($foundString, $unless); - if ($position == strlen($foundString)) - { - return $string.$foundString; - } - // rewind changes and return nothing - $this->pos = $lastPos; - return ''; - } - - /** - * Copies the content until it reaches the token string., - * - * @param string $token - * @param bool $char - * @param bool $escape - * @return string - * @uses $this->copyUntil() - */ - public function copyByToken($token, $char = false, $escape = false) - { - $string = $this->$token; - return $this->copyUntil($string, $char, $escape); - } - - /** - * Skip a given set of characters. - * - * @param string $string - * @param bool $copy - * @return $this|string - */ - public function skip($string, $copy = false) - { - $len = strspn($this->content, $string, $this->pos); - - // make it chainable if they don't want a copy - $return = $this; - if ($copy) - { - $return = substr($this->content, $this->pos, $len); - } - - // update the position - $this->pos += $len; - - return $return; - } - - /** - * Skip a given token of pre-defined characters. - * - * @param string $token - * @param bool $copy - * @return null|string - * @uses $this->skip() - */ - public function skipByToken($token, $copy = false) - { - $string = $this->$token; - return $this->skip($string, $copy); - } +use PHPHtmlParser\Enum\StringToken; +use PHPHtmlParser\Exceptions\ContentLengthException; +use PHPHtmlParser\Exceptions\LogicalException; + +/** + * Class Content. + */ +class Content +{ + /** + * The content string. + * + * @var string + */ + protected $content; + + /** + * The size of the content. + * + * @var int + */ + protected $size; + + /** + * The current position we are in the content. + * + * @var int + */ + protected $pos; + + /** + * The following 4 strings are tags that are important to us. + * + * @var string + */ + protected $blank = " \t\r\n"; + protected $equal = ' =/>'; + protected $slash = " />\r\n\t"; + protected $attr = ' >'; + + /** + * Content constructor. + */ + public function __construct(string $content = '') + { + $this->content = $content; + $this->size = \strlen($content); + $this->pos = 0; + } + + /** + * Returns the current position of the content. + */ + public function getPosition(): int + { + return $this->pos; + } + + /** + * Gets the current character we are at. + * + * @param ?int $char + */ + public function char(?int $char = null): string + { + return $this->content[$char ?? $this->pos] ?? ''; + } + + /** + * Gets a string from the current character position. + * + * @param int $length + * @return string + */ + public function string(int $length = 1): string + { + $string = ''; + $position = $this->pos; + do { + $string .= $this->char($position++); + } while ($position < $this->pos + $length); + return $string; + } + + /** + * Moves the current position forward. + * + * @throws ContentLengthException + */ + public function fastForward(int $count): Content + { + if (!$this->canFastForward($count)) { + // trying to go over the content length, throw exception + throw new ContentLengthException('Attempt to fastForward pass the length of the content.'); + } + $this->pos += $count; + + return $this; + } + + /** + * Checks if we can move the position forward. + */ + public function canFastForward(int $count): bool + { + return \strlen($this->content) >= $this->pos + $count; + } + + /** + * Moves the current position backward. + */ + public function rewind(int $count): Content + { + $this->pos -= $count; + if ($this->pos < 0) { + $this->pos = 0; + } + + return $this; + } + + /** + * Copy the content until we find the given string. + */ + public function copyUntil(string $string, bool $char = false, bool $escape = false): string + { + if ($this->pos >= $this->size) { + // nothing left + return ''; + } + + if ($escape) { + $position = $this->pos; + $found = false; + while (!$found) { + $position = \strpos($this->content, $string, $position); + if ($position === false) { + // reached the end + break; + } + + if ($this->char($position - 1) == '\\') { + // this character is escaped + ++$position; + continue; + } + + $found = true; + } + } elseif ($char) { + $position = \strcspn($this->content, $string, $this->pos); + $position += $this->pos; + } else { + $position = \strpos($this->content, $string, $this->pos); + } + + if ($position === false) { + // could not find character, just return the remaining of the content + $return = \substr($this->content, $this->pos, $this->size - $this->pos); + if ($return === false) { + throw new LogicalException('Substr returned false with position ' . $this->pos . '.'); + } + $this->pos = $this->size; + + return $return; + } + + if ($position == $this->pos) { + // we are at the right place + return ''; + } + + $return = \substr($this->content, $this->pos, $position - $this->pos); + if ($return === false) { + throw new LogicalException('Substr returned false with position ' . $this->pos . '.'); + } + // set the new position + $this->pos = $position; + + return $return; + } + + /** + * Copies the content until the string is found and return it + * unless the 'unless' is found in the substring. + */ + public function copyUntilUnless(string $string, string $unless): string + { + $lastPos = $this->pos; + $this->fastForward(1); + $foundString = $this->copyUntil($string, true, true); + + $position = \strcspn($foundString, $unless); + if ($position == \strlen($foundString)) { + return $string . $foundString; + } + // rewind changes and return nothing + $this->pos = $lastPos; + + return ''; + } + + /** + * Copies the content until it reaches the token string.,. + * + * @uses $this->copyUntil() + */ + public function copyByToken(StringToken $stringToken, bool $char = false, bool $escape = false): string + { + $string = $stringToken->getValue(); + + return $this->copyUntil($string, $char, $escape); + } + + /** + * Skip a given set of characters. + * + * @throws LogicalException + */ + public function skip(string $string, bool $copy = false): string + { + $len = \strspn($this->content, $string, $this->pos); + if ($len === false) { + throw new LogicalException('Strspn returned false with position ' . $this->pos . '.'); + } + $return = ''; + if ($copy) { + $return = \substr($this->content, $this->pos, $len); + if ($return === false) { + throw new LogicalException('Substr returned false with position ' . $this->pos . '.'); + } + } + + // update the position + $this->pos += $len; + + return $return; + } + + /** + * Skip a given token of pre-defined characters. + * + * @uses $this->skip() + */ + public function skipByToken(StringToken $skipToken, bool $copy = false): string + { + $string = $skipToken->getValue(); + + return $this->skip($string, $copy); + } } diff --git a/src/PHPHtmlParser/Contracts/Dom/CleanerInterface.php b/src/PHPHtmlParser/Contracts/Dom/CleanerInterface.php new file mode 100644 index 00000000..725a8a49 --- /dev/null +++ b/src/PHPHtmlParser/Contracts/Dom/CleanerInterface.php @@ -0,0 +1,16 @@ +parsedSelectorDTO[] = $parsedSelectorDTO; + } + } + } + + /** + * @param ParsedSelectorDTO[] $parsedSelectorDTOs + */ + public static function makeCollection(array $parsedSelectorDTOs): ParsedSelectorCollectionDTO + { + return new ParsedSelectorCollectionDTO($parsedSelectorDTOs); + } + + /** + * @return ParsedSelectorDTO[] + */ + public function getParsedSelectorDTO(): array + { + return $this->parsedSelectorDTO; + } +} diff --git a/src/PHPHtmlParser/DTO/Selector/ParsedSelectorDTO.php b/src/PHPHtmlParser/DTO/Selector/ParsedSelectorDTO.php new file mode 100644 index 00000000..bce0721f --- /dev/null +++ b/src/PHPHtmlParser/DTO/Selector/ParsedSelectorDTO.php @@ -0,0 +1,41 @@ +rules[] = $ruleDTO; + } + } + } + + /** + * @param RuleDTO[] $ruleDTOs + */ + public static function makeFromRules(array $ruleDTOs): ParsedSelectorDTO + { + return new ParsedSelectorDTO($ruleDTOs); + } + + /** + * @return RuleDTO[] + */ + public function getRules(): array + { + return $this->rules; + } +} diff --git a/src/PHPHtmlParser/DTO/Selector/RuleDTO.php b/src/PHPHtmlParser/DTO/Selector/RuleDTO.php new file mode 100644 index 00000000..5299e3a0 --- /dev/null +++ b/src/PHPHtmlParser/DTO/Selector/RuleDTO.php @@ -0,0 +1,100 @@ +tag = $values['tag']; + $this->operator = $values['operator']; + $this->key = $values['key']; + $this->value = $values['value']; + $this->noKey = $values['noKey']; + $this->alterNext = $values['alterNext']; + } + + /** + * @param string|array|null $key + * @param string|array|null $value + */ + public static function makeFromPrimitives(string $tag, string $operator, $key, $value, bool $noKey, bool $alterNext): RuleDTO + { + return new RuleDTO([ + 'tag' => $tag, + 'operator' => $operator, + 'key' => $key, + 'value' => $value, + 'noKey' => $noKey, + 'alterNext' => $alterNext, + ]); + } + + public function getTag(): string + { + return $this->tag; + } + + public function getOperator(): string + { + return $this->operator; + } + + /** + * @return string|array|null + */ + public function getKey() + { + return $this->key; + } + + /** + * @return string|array|null + */ + public function getValue() + { + return $this->value; + } + + public function isNoKey(): bool + { + return $this->noKey; + } + + public function isAlterNext(): bool + { + return $this->alterNext; + } +} diff --git a/src/PHPHtmlParser/DTO/Tag/AttributeDTO.php b/src/PHPHtmlParser/DTO/Tag/AttributeDTO.php new file mode 100755 index 00000000..3e7e1824 --- /dev/null +++ b/src/PHPHtmlParser/DTO/Tag/AttributeDTO.php @@ -0,0 +1,60 @@ +value = $values['value']; + $this->doubleQuote = $values['doubleQuote'] ?? true; + } + + public static function makeFromPrimitives(?string $value, bool $doubleQuote = true): AttributeDTO + { + return new AttributeDTO([ + 'value' => $value, + 'doubleQuote' => $doubleQuote, + ]); + } + + public function getValue(): ?string + { + return $this->value; + } + + public function isDoubleQuote(): bool + { + return $this->doubleQuote; + } + + public function htmlspecialcharsDecode(): void + { + if (!\is_null($this->value)) { + $this->value = \htmlspecialchars_decode($this->value); + } + } + + /** + * @throws Exception + */ + public function encodeValue(Encode $encode) + { + $this->value = $encode->convert($this->value); + } +} diff --git a/src/PHPHtmlParser/DTO/TagDTO.php b/src/PHPHtmlParser/DTO/TagDTO.php new file mode 100644 index 00000000..71f0ec1c --- /dev/null +++ b/src/PHPHtmlParser/DTO/TagDTO.php @@ -0,0 +1,74 @@ +status = $values['status'] ?? false; + $this->closing = $values['closing'] ?? false; + $this->node = $values['node'] ?? null; + $this->tag = $values['tag'] ?? null; + } + + public static function makeFromPrimitives(bool $status = false, bool $closing = false, ?HtmlNode $node = null, ?string $tag = null): TagDTO + { + return new TagDTO([ + 'status' => $status, + 'closing' => $closing, + 'node' => $node, + 'tag' => $tag, + ]); + } + + public function isStatus(): bool + { + return $this->status; + } + + public function isClosing(): bool + { + return $this->closing; + } + + /** + * @return mixed + */ + public function getNode(): ?HtmlNode + { + return $this->node; + } + + /** + * @return mixed + */ + public function getTag(): ?string + { + return $this->tag; + } +} diff --git a/src/PHPHtmlParser/Discovery/CleanerDiscovery.php b/src/PHPHtmlParser/Discovery/CleanerDiscovery.php new file mode 100644 index 00000000..96ef6787 --- /dev/null +++ b/src/PHPHtmlParser/Discovery/CleanerDiscovery.php @@ -0,0 +1,25 @@ +root->innerHtml(); - } - - /** - * A simple wrapper around the root node. - * - * @param string $name - * @return mixed - */ - public function __get($name) - { - return $this->root->$name; - } - - /** - * Attempts to load the dom from any resource, string, file, or URL. - * - * @param string $str - * @param array $options - * @return $this - */ - public function load($str, $options = []) - { - // check if it's a file - if (strpos($str, "\n") === FALSE && is_file($str)) - { - return $this->loadFromFile($str, $options); - } - // check if it's a url - if (preg_match("/^https?:\/\//i",$str)) - { - return $this->loadFromUrl($str, $options); - } - - return $this->loadStr($str, $options); - } - - /** - * Loads the dom from a document file/url - * - * @param string $file - * @param array $options - * @return $this - */ - public function loadFromFile($file, $options = []) - { - return $this->loadStr(file_get_contents($file), $options); - } - - /** - * Use a curl interface implementation to attempt to load - * the content from a url. - * - * @param string $url - * @param array $options - * @param CurlInterface $curl - * @return $this - */ - public function loadFromUrl($url, $options = [], CurlInterface $curl = null) - { - if (is_null($curl)) - { - // use the default curl interface - $curl = new Curl; - } - $content = $curl->get($url); - - return $this->loadStr($content, $options); - } - - /** - * Parsers the html of the given string. Used for load(), loadFromFile(), - * and loadFromUrl(). - * - * @param string $str - * @param array $option - * @return $this - */ - public function loadStr($str, $option) - { - $this->options = new Options; - $this->options->setOptions($this->globalOptions) - ->setOptions($option); - - $this->rawSize = strlen($str); - $this->raw = $str; - - $html = $this->clean($str); - - $this->size = strlen($str); - $this->content = new Content($html); - - $this->parse(); - $this->detectCharset(); - - return $this; - } - - /** - * Sets a global options array to be used by all load calls. - * - * @param array $options - * @return $this - */ - public function setOptions(array $options) - { - $this->globalOptions = $options; - return $this; - } - - /** - * Find elements by css selector on the root node. - * - * @param string $selector - * @param int $nth - * @return array - */ - public function find($selector, $nth = null) - { - $this->isLoaded(); - return $this->root->find($selector, $nth); - } - - /** - * Adds the tag (or tags in an array) to the list of tags that will always - * be self closing. - * - * @param string|array $tag - * @return $this - */ - public function addSelfClosingTag($tag) - { - if ( ! is_array($tag)) - { - $tag = [$tag]; - } - foreach ($tag as $value) - { - $this->selfClosing[] = $value; - } - return $this; - } - - /** - * Removes the tag (or tags in an array) from the list of tags that will - * always be self closing. - * - * @param string|array $tag - * @return $this - */ - public function removeSelfClosingTag($tag) - { - if ( ! is_array($tag)) - { - $tag = [$tag]; - } - $this->selfClosing = array_diff($this->selfClosing, $tag); - return $this; - } - - /** - * Sets the list of self closing tags to empty. - * - * @return $this - */ - public function clearSelfClosingTags() - { - $this->selfClosing = []; - return $this; - } - - /** - * Simple wrapper function that returns the first child. - * - * @return \PHPHtmlParser\Dom\AbstractNode - */ - public function firstChild() - { - $this->isLoaded(); - return $this->root->firstChild(); - } - - /** - * Simple wrapper function that returns the last child. - * - * @return \PHPHtmlParser\Dom\AbstractNode - */ - public function lastChild() - { - $this->isLoaded(); - return $this->root->lastChild(); - } - - /** - * Simple wrapper function that returns an element by the - * id. - * - * @param string $id - * @return \PHPHtmlParser\Dom\AbstractNode - */ - public function getElementById($id) - { - $this->isLoaded(); - return $this->find('#'.$id, 0); - } - - /** - * Simple wrapper function that returns all elements by - * tag name. +use PHPHtmlParser\Exceptions\UnknownChildTypeException; +use Psr\Http\Client\ClientExceptionInterface; +use Psr\Http\Client\ClientInterface; +use Psr\Http\Message\RequestInterface; + +/** + * Class Dom. + */ +class Dom implements DomInterface +{ + use RootAccessTrait; + + /** + * The charset we would like the output to be in. + * + * @var string + */ + private $defaultCharset = 'UTF-8'; + + /** + * The document string. + * + * @var Content + */ + private $content; + + /** + * A global options array to be used by all load calls. * - * @param string $name - * @return array - */ - public function getElementsByTag($name) - { - $this->isLoaded(); - return $this->find($name); - } - - /** - * Simple wrapper function that returns all elements by - * class name. - * - * @param string $class - * @return array - */ - public function getElementsByClass($class) - { - $this->isLoaded(); - return $this->find('.'.$class); - } - - /** - * Checks if the load methods have been called. - * - * @throws NotLoadedException - */ - protected function isLoaded() - { - if (is_null($this->content)) - { - throw new NotLoadedException('Content is not loaded!'); - } - } - - /** - * Cleans the html of any none-html information. - * - * @param string $str - * @return string - */ - protected function clean($str) - { - // clean out the \n\r - $str = str_replace(["\r\n", "\r", "\n"], ' ', $str); - - // strip the doctype - $str = mb_eregi_replace("", '', $str); - - // strip out comments - $str = mb_eregi_replace("", '', $str); - - // strip out cdata - $str = mb_eregi_replace("", '', $str); - - // strip out -

....

'); - $this->assertEquals('....', $dom->getElementsByTag('p')[1]->innerHtml); - } - - public function testMultipleDoubleQuotes() - { - $dom = new Dom; - $dom->load('Hello'); - $this->assertEquals('This is a "test" of double quotes', $dom->getElementsByTag('a')[0]->title); - } - - public function testMultipleSingleQuotes() - { - $dom = new Dom; - $dom->load("Hello"); - $this->assertEquals("Ain't this the best", $dom->getElementsByTag('a')[0]->title); - } +use PHPHtmlParser\Dom; +use PHPHtmlParser\Options; +use PHPUnit\Framework\TestCase; + +class DomTest extends TestCase +{ + public function tearDown() + { + Mockery::close(); + } + + /** + * /* */"; + $dom = new Dom(); + $dom->setOptions((new Options())->setCleanupInput(false)); + $dom->loadStr($html); + $this->assertSame($html, $dom->root->outerHtml()); + } + + public function testLoadSelfclosingAttr() + { + $dom = new Dom(); + $dom->loadStr("

baz
"); + $br = $dom->find('br', 0); + $this->assertEquals('
', $br->outerHtml); + } + + public function testLoadSelfclosingAttrToString() + { + $dom = new Dom(); + $dom->loadStr("

baz
"); + $br = $dom->find('br', 0); + $this->assertEquals('
', (string) $br); + } + + public function testLoadNoOpeningTag() + { + $dom = new Dom(); + $dom->loadStr('
PR Manager
content
'); + $this->assertEquals('content', $dom->find('.content', 0)->text); + } + + public function testLoadNoValueAttribute() + { + $dom = new Dom(); + $dom->loadStr('
Main content here
'); + $this->assertEquals('
Main content here
', $dom->innerHtml); + } + + public function testLoadBackslashAttributeValue() + { + $dom = new Dom(); + $dom->loadStr('
Main content here
'); + $this->assertEquals('
Main content here
', $dom->innerHtml); + } + + public function testLoadNoValueAttributeBefore() + { + $dom = new Dom(); + $dom->loadStr('
Main content here
'); + $this->assertEquals('
Main content here
', $dom->innerHtml); + } + + public function testLoadUpperCase() + { + $dom = new Dom(); + $dom->loadStr('

hEY BRO, CLICK HERE

'); + $this->assertEquals('

hEY BRO, CLICK HERE

', $dom->find('div', 0)->innerHtml); + } + + public function testLoadWithFile() + { + $dom = new Dom(); + $dom->loadFromFile('tests/data/files/small.html'); + $this->assertEquals('VonBurgermeister', $dom->find('.post-user font', 0)->text); + } + + public function testLoadFromFile() + { + $dom = new Dom(); + $dom->loadFromFile('tests/data/files/small.html'); + $this->assertEquals('VonBurgermeister', $dom->find('.post-user font', 0)->text); + } + + public function testLoadFromFileFind() + { + $dom = new Dom(); + $dom->loadFromFile('tests/data/files/small.html'); + $this->assertEquals('VonBurgermeister', $dom->find('.post-row div .post-user font', 0)->text); + } + + public function testLoadFromFileNotFound() + { + $dom = new Dom(); + $this->expectException(\PHPHtmlParser\Exceptions\LogicalException::class); + $dom->loadFromFile('tests/data/files/unkowne.html'); + } + + public function testLoadUtf8() + { + $dom = new Dom(); + $dom->loadStr('

Dzień

'); + $this->assertEquals('Dzień', $dom->find('p', 0)->text); + } + + public function testLoadFileWhitespace() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setCleanupInput(false)); + $dom->loadFromFile('tests/data/files/whitespace.html'); + $this->assertEquals(1, \count($dom->find('.class'))); + $this->assertEquals('', (string) $dom); + } + + public function testLoadFileBig() + { + $dom = new Dom(); + $dom->loadFromFile('tests/data/files/big.html'); + $this->assertEquals(20, \count($dom->find('.content-border'))); + } + + public function testLoadFileBigTwice() + { + $dom = new Dom(); + $dom->loadFromFile('tests/data/files/big.html'); + $post = $dom->find('.post-row', 0); + $this->assertEquals('

Журчанье воды
Черно-белые тени
Вновь на фонтане

', $post->find('.post-message', 0)->innerHtml); + } + + public function testLoadFileBigTwicePreserveOption() + { + $dom = new Dom(); + $dom->loadFromFile('tests/data/files/big.html', + (new Options())->setPreserveLineBreaks(true)); + $post = $dom->find('.post-row', 0); + $this->assertEquals( + "

Журчанье воды
\nЧерно-белые тени
\nВновь на фонтане

", + \trim($post->find('.post-message', 0)->innerHtml) + ); + } + + public function testLoadFromUrl() + { + $streamMock = Mockery::mock(\Psr\Http\Message\StreamInterface::class); + $streamMock->shouldReceive('getContents') + ->once() + ->andReturn(\file_get_contents('tests/data/files/small.html')); + $responseMock = Mockery::mock(\Psr\Http\Message\ResponseInterface::class); + $responseMock->shouldReceive('getBody') + ->once() + ->andReturn($streamMock); + $clientMock = Mockery::mock(\Psr\Http\Client\ClientInterface::class); + $clientMock->shouldReceive('sendRequest') + ->once() + ->andReturn($responseMock); + + $dom = new Dom(); + $dom->loadFromUrl('http://google.com', null, $clientMock); + $this->assertEquals('VonBurgermeister', $dom->find('.post-row div .post-user font', 0)->text); + } + + public function testScriptCleanerScriptTag() + { + $dom = new Dom(); + $dom->loadStr(' +

.....

+ +

....

'); + $this->assertEquals('....', $dom->getElementsByTag('p')[1]->innerHtml); + } + + public function testClosingSpan() + { + $dom = new Dom(); + $dom->loadStr("
sometext
"); + $this->assertEquals('sometext', $dom->getElementsByTag('div')[0]->innerHtml); + } + + public function testMultipleDoubleQuotes() + { + $dom = new Dom(); + $dom->loadStr('Hello'); + $this->assertEquals('This is a "test" of double quotes', $dom->getElementsByTag('a')[0]->title); + } + + public function testMultipleSingleQuotes() + { + $dom = new Dom(); + $dom->loadStr("Hello"); + $this->assertEquals("Ain't this the best", $dom->getElementsByTag('a')[0]->title); + } + + public function testBeforeClosingTag() + { + $dom = new Dom(); + $dom->loadStr('
'); + $this->assertEquals('
', (string) $dom); + } + + public function testCodeTag() + { + $dom = new Dom(); + $dom->loadStr('hello$foo = "bar";'); + $this->assertEquals('hello$foo = "bar";', (string) $dom); + } + + public function testCountChildren() + { + $dom = new Dom(); + $dom->loadStr('hello$foo = "bar";'); + $this->assertEquals(2, $dom->countChildren()); + } + + public function testGetChildrenArray() + { + $dom = new Dom(); + $dom->loadStr('hello$foo = "bar";'); + $this->assertInternalType('array', $dom->getChildren()); + } + + public function testHasChildren() + { + $dom = new Dom(); + $dom->loadStr('hello$foo = "bar";'); + $this->assertTrue($dom->hasChildren()); + } + + public function testWhitespaceInText() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setRemoveDoubleSpace(false)); + $dom->loadStr('
    Hello world
'); + $this->assertEquals('
    Hello world
', (string) $dom); + } + + public function testGetComplexAttribute() + { + $dom = new Dom(); + $dom->loadStr('Next >'); + $href = $dom->find('a', 0)->href; + $this->assertEquals('?search=Fort+William&session_type=face&distance=100&uqs=119846&page=4', $href); + } + + public function testGetComplexAttributeHtmlSpecialCharsDecode() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setHtmlSpecialCharsDecode(true)); + $dom->loadStr('Next >'); + $a = $dom->find('a', 0); + $this->assertEquals('Next >', $a->innerHtml); + $href = $a->href; + $this->assertEquals('?search=Fort+William&session_type=face&distance=100&uqs=119846&page=4', $href); + } + + public function testGetChildrenNoChildren() + { + $dom = new Dom(); + $dom->loadStr('
Test
'); + + $imgNode = $dom->root->find('img'); + $children = $imgNode->getChildren(); + $this->assertTrue(\count($children) === 0); + } + + public function testInfiniteLoopNotHappening() + { + $dom = new Dom(); + $dom->loadStr(' + + + + + + + <'); + + $metaNodes = $dom->root->find('meta'); + $this->assertEquals(4, \count($metaNodes)); + } + + public function testFindOrder() + { + $str = '

'; + $dom = new Dom(); + $dom->loadStr($str); + $images = $dom->find('img'); + + $this->assertEquals('', (string) $images[0]); + } + + public function testCaseInSensitivity() + { + $str = "blah"; + $dom = new Dom(); + $dom->loadStr($str); + + $FooBar = $dom->find('FooBar'); + $this->assertEquals('asdf', $FooBar->getAttribute('attribute')); + } + + public function testCaseSensitivity() + { + $str = "blah"; + $dom = new Dom(); + $dom->loadStr($str); + + $FooBar = $dom->find('FooBar'); + $this->assertEquals('asdf', $FooBar->Attribute); + } + + public function testEmptyAttribute() + { + $str = '
  • blah
  • what
'; + $dom = new Dom(); + $dom->loadStr($str); + + $items = $dom->find('.summary .foo'); + $this->assertEquals(1, \count($items)); + } + + public function testInnerText() + { + $html = <<123456789101112 +EOF; + $dom = new Dom(); + $dom->loadStr($html); + $this->assertEquals($dom->innerText, '123456789101112'); + } + + public function testMultipleSquareSelector() + { + $dom = new Dom(); + $dom->loadStr(''); + + $items = $dom->find('input[type=text][name=foo][baz=fig]'); + $this->assertEquals(1, \count($items)); + } + + public function testNotSquareSelector() + { + $dom = new Dom(); + $dom->loadStr(''); + + $items = $dom->find('input[type!=foo]'); + $this->assertEquals(1, \count($items)); + } + + public function testStartSquareSelector() + { + $dom = new Dom(); + $dom->loadStr(''); + + $items = $dom->find('input[name^=f]'); + $this->assertEquals(1, \count($items)); + } + + public function testEndSquareSelector() + { + $dom = new Dom(); + $dom->loadStr(''); + + $items = $dom->find('input[baz$=g]'); + $this->assertEquals(1, \count($items)); + } + + public function testStarSquareSelector() + { + $dom = new Dom(); + $dom->loadStr(''); + + $items = $dom->find('input[baz*=*]'); + $this->assertEquals(1, \count($items)); + } + + public function testStarFullRegexSquareSelector() + { + $dom = new Dom(); + $dom->loadStr(''); + + $items = $dom->find('input[baz*=/\w+/]'); + $this->assertEquals(1, \count($items)); + } + + public function testFailedSquareSelector() + { + $dom = new Dom(); + $dom->loadStr(''); + + $items = $dom->find('input[baz%=g]'); + $this->assertEquals(1, \count($items)); + } + + public function testLoadGetAttributeWithBackslash() + { + $dom = new Dom(); + $dom->loadStr('
\
demo
'); + $imgs = $dom->find('img', 0); + $this->assertEquals('/img/test.png', $imgs->getAttribute('src')); + } + + public function test25ChildrenFound() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setWhitespaceTextNode(false)); + $dom->loadFromFile('tests/data/files/51children.html'); + $children = $dom->find('#red-line-g *'); + $this->assertEquals(25, \count($children)); + } + + public function testHtml5PageloadStr() + { + $dom = new Dom(); + $dom->loadFromFile('tests/data/files/html5.html'); + + /** @var Node\AbstractNode $meta */ + $div = $dom->find('div.d-inline-block', 0); + $this->assertEquals('max-width: 29px', $div->getAttribute('style')); + } + + public function testFindAttributeInBothParentAndChild() + { + $dom = new Dom(); + $dom->loadStr(' + +'); + + /** @var Node\AbstractNode $meta */ + $nodes = $dom->find('[attribute]'); + $this->assertCount(2, $nodes); + } + + public function testLessThanCharacterInJavascript() + { + $results = (new Dom())->loadStr('
', + (new Options())->setCleanupInput(false) + ->setRemoveScripts(false) + )->find('body'); + $this->assertCount(1, $results); + } + + public function testUniqueIdForAllObjects() + { + // Create a dom which will be used as a parent/container for a paragraph + $dom1 = new \PHPHtmlParser\Dom(); + $dom1->loadStr('
A container div
'); // Resets the counter (doesn't matter here as the counter was 0 even without resetting) + $div = $dom1->firstChild(); + + // Create a paragraph outside of the first dom + $dom2 = new \PHPHtmlParser\Dom(); + $dom2->loadStr('

Our new paragraph.

'); // Resets the counter + $paragraph = $dom2->firstChild(); + + $div->addChild($paragraph); + + $this->assertEquals('A container div

Our new paragraph.

', $div->innerhtml); + } + + public function testFindDescendantsOfMatch() + { + $dom = new Dom(); + $dom->loadStr('

+ + test + testing + This is a test + italic + password123 + + another +

'); + + $nodes = $dom->find('b'); + $this->assertCount(5, $nodes); + } + + public function testCompatibleWithWordPressShortcode() + { + $dom = new Dom(); + $dom->loadStr('

+[wprs_alert type="success" content="this is a short code" /] +

'); + + $node = $dom->find('p', 0); + $this->assertEquals(' [wprs_alert type="success" content="this is a short code" /] ', $node->innerHtml); + } + + public function testBrokenHtml() + { + $dom = new Dom(); + $dom->loadStr('assertEquals('', $dom->outerHtml); + } + + public function testXMLOpeningToken() + { + $dom = new Dom(); + $dom->loadStr('

fun time

'); + + $this->assertEquals('

fun time

', $dom->outerHtml); + } + + /** + * Test to cover issue found in ticket #221. + */ + public function testRandomTagInMiddleOfText() + { + $dom = new Dom(); + $dom->loadStr('

Hello, this is just a test in which <55 names with some other text > should be interpreted as text

'); + + $this->assertEquals('

Hello, this is just a test in which <55 names with some other text> should be interpreted as text

', $dom->outerHtml); + } + + public function testHttpCall() + { + $dom = new Dom(); + $dom->loadFromUrl('http://google.com'); + $this->assertNotEmpty($dom->outerHtml); + } } diff --git a/tests/Node/ChildrenTest.php b/tests/Node/ChildrenTest.php old mode 100644 new mode 100755 index 65fe0f35..37a160ff --- a/tests/Node/ChildrenTest.php +++ b/tests/Node/ChildrenTest.php @@ -1,78 +1,122 @@ setParent($parent); - $this->assertEquals($parent->id(), $child->getParent()->id()); - } +class NodeChildTest extends TestCase +{ + public function testGetParent() + { + $parent = new Node(); + $child = new Node(); + $child->setParent($parent); + $this->assertEquals($parent->id(), $child->getParent()->id()); + } - public function testSetParentTwice() - { - $parent = new Node; - $parent2 = new Node; - $child = new Node; - $child->setParent($parent); - $child->setParent($parent2); - $this->assertEquals($parent2->id(), $child->getParent()->id()); - } + public function testSetParentTwice() + { + $parent = new Node(); + $parent2 = new Node(); + $child = new Node(); + $child->setParent($parent); + $child->setParent($parent2); + $this->assertEquals($parent2->id(), $child->getParent()->id()); + } - public function testNextSibling() - { - $parent = new Node; - $child = new Node; - $child2 = new Node; - $child->setParent($parent); - $child2->setParent($parent); - $this->assertEquals($child2->id(), $child->nextSibling()->id()); - } + public function testNextSibling() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $child->setParent($parent); + $child2->setParent($parent); + $this->assertEquals($child2->id(), $child->nextSibling()->id()); + } - public function testPreviousSibling() - { - $parent = new Node; - $child = new Node; - $child2 = new Node; - $child->setParent($parent); - $child2->setParent($parent); - $this->assertEquals($child->id(), $child2->previousSibling()->id()); - } + /** + * @expectedException \PHPHtmlParser\Exceptions\ChildNotFoundException + */ + public function testNextSiblingNotFound() + { + $parent = new Node(); + $child = new Node(); + $child->setParent($parent); + $child->nextSibling(); + } - public function testGetChildren() - { - $parent = new Node; - $child = new Node; - $child2 = new Node; - $child->setParent($parent); - $child2->setParent($parent); - $this->assertEquals($child->id(), $parent->getChildren()[0]->id()); - } + /** + * @expectedException \PHPHtmlParser\Exceptions\ParentNotFoundException + */ + public function testNextSiblingNoParent() + { + $child = new Node(); + $child->nextSibling(); + } - public function testCountChildren() - { - $parent = new Node; - $child = new Node; - $child2 = new Node; - $child->setParent($parent); - $child2->setParent($parent); - $this->assertEquals(2, $parent->countChildren()); - } + public function testPreviousSibling() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $child->setParent($parent); + $child2->setParent($parent); + $this->assertEquals($child->id(), $child2->previousSibling()->id()); + } - public function testIsChild () - { - $parent = new Node; - $child1 = new Node; - $child2 = new Node; + /** + * @expectedException \PHPHtmlParser\Exceptions\ChildNotFoundException + */ + public function testPreviousSiblingNotFound() + { + $parent = new Node(); + $node = new Node(); + $node->setParent($parent); + $node->previousSibling(); + } - $child1->setParent($parent); - $child2->setParent($child1); + /** + * @expectedException \PHPHtmlParser\Exceptions\ParentNotFoundException + */ + public function testPreviousSiblingNoParent() + { + $child = new Node(); + $child->previousSibling(); + } - $this->assertTrue ($parent->isChild ($child1->id ())); - $this->assertTrue ($parent->isDescendant ($child2->id ())); - $this->assertFalse ($parent->isChild ($child2->id ())); - } + public function testGetChildren() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $child->setParent($parent); + $child2->setParent($parent); + $this->assertEquals($child->id(), $parent->getChildren()[0]->id()); + } + + public function testCountChildren() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $child->setParent($parent); + $child2->setParent($parent); + $this->assertEquals(2, $parent->countChildren()); + } + + public function testIsChild() + { + $parent = new Node(); + $child1 = new Node(); + $child2 = new Node(); + + $child1->setParent($parent); + $child2->setParent($child1); + + $this->assertTrue($parent->isChild($child1->id())); + $this->assertTrue($parent->isDescendant($child2->id())); + $this->assertFalse($parent->isChild($child2->id())); + } } diff --git a/tests/Node/HtmlTest.php b/tests/Node/HtmlTest.php old mode 100644 new mode 100755 index 8bd58019..592003b4 --- a/tests/Node/HtmlTest.php +++ b/tests/Node/HtmlTest.php @@ -1,434 +1,535 @@ setAttributes([ - 'class' => [ - 'value' => 'all', - 'doubleQuote' => true, - ], - ]); - $a = new Tag('a'); - $a->setAttributes([ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => false, - ], - ]); - $br = new Tag('br'); - $br->selfClosing(); - - $parent = new HtmlNode($div); - $childa = new HtmlNode($a); - $childbr = new HtmlNode($br); - $parent->addChild($childa); - $parent->addChild($childbr); - $childa->addChild(new TextNode('link')); - - $this->assertEquals("link
", $parent->innerHtml()); - } - - public function testInnerHtmlTwice() - { - $div = new Tag('div'); - $div->setAttributes([ - 'class' => [ - 'value' => 'all', - 'doubleQuote' => true, - ], - ]); - $a = new Tag('a'); - $a->setAttributes([ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => false, - ], - ]); - $br = new Tag('br'); - $br->selfClosing(); - - $parent = new HtmlNode($div); - $childa = new HtmlNode($a); - $childbr = new HtmlNode($br); - $parent->addChild($childa); - $parent->addChild($childbr); - $childa->addChild(new TextNode('link')); - - $inner = $parent->innerHtml(); - $this->assertEquals($inner, $parent->innerHtml()); - } - - /** - * @expectedException PHPHtmlParser\Exceptions\UnknownChildTypeException - */ - public function testInnerHtmlUnkownChild() - { - $div = new Tag('div'); - $div->setAttributes([ - 'class' => [ - 'value' => 'all', - 'doubleQuote' => true, - ], - ]); - $a = new Tag('a'); - $a->setAttributes([ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => false, - ], - ]); - $br = new Tag('br'); - $br->selfClosing(); - - $parent = new HtmlNode($div); - $childa = new HtmlNode($a); - $childbr = new MockNode($br); - $parent->addChild($childa); - $parent->addChild($childbr); - $childa->addChild(new TextNode('link')); - - $inner = $parent->innerHtml(); - $this->assertEquals($inner, $parent->innerHtml()); - } - - public function testInnerHtmlMagic() - { - $parent = new HtmlNode('div'); - $parent->getTag()->setAttributes([ - 'class' => [ - 'value' => 'all', - 'doubleQuote' => true, - ], - ]); - $childa = new HtmlNode('a'); - $childa->getTag()->setAttributes([ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => false, - ], - ]); - $childbr = new HtmlNode('br'); - $childbr->getTag()->selfClosing(); - - $parent->addChild($childa); - $parent->addChild($childbr); - $childa->addChild(new TextNode('link')); - - $this->assertEquals("link
", $parent->innerHtml); - } - - public function testOuterHtml() - { - $div = new Tag('div'); - $div->setAttributes([ - 'class' => [ - 'value' => 'all', - 'doubleQuote' => true, - ], - ]); - $a = new Tag('a'); - $a->setAttributes([ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => false, - ], - ]); - $br = new Tag('br'); - $br->selfClosing(); - - $parent = new HtmlNode($div); - $childa = new HtmlNode($a); - $childbr = new HtmlNode($br); - $parent->addChild($childa); - $parent->addChild($childbr); - $childa->addChild(new TextNode('link')); - - $this->assertEquals('', $parent->outerHtml()); - } - - public function testOuterHtmlTwice() - { - $div = new Tag('div'); - $div->setAttributes([ - 'class' => [ - 'value' => 'all', - 'doubleQuote' => true, - ], - ]); - $a = new Tag('a'); - $a->setAttributes([ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => false, - ], - ]); - $br = new Tag('br'); - $br->selfClosing(); - - $parent = new HtmlNode($div); - $childa = new HtmlNode($a); - $childbr = new HtmlNode($br); - $parent->addChild($childa); - $parent->addChild($childbr); - $childa->addChild(new TextNode('link')); - - $outer = $parent->outerHtml(); - $this->assertEquals($outer, $parent->outerHtml()); - } - - public function testOuterHtmlEmpty() - { - $a = new Tag('a'); - $a->setAttributes([ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => false, - ], - ]); - $node = new HtmlNode($a); - - $this->assertEquals("", $node->OuterHtml()); - } - - public function testOuterHtmlMagic() - { - $parent = new HtmlNode('div'); - $parent->getTag()->setAttributes([ - 'class' => [ - 'value' => 'all', - 'doubleQuote' => true, - ], - ]); - $childa = new HtmlNode('a'); - $childa->getTag()->setAttributes([ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => false, - ], - ]); - $childbr = new HtmlNode('br'); - $childbr->getTag()->selfClosing(); - - $parent->addChild($childa); - $parent->addChild($childbr); - $childa->addChild(new TextNode('link')); - - $this->assertEquals('', $parent->outerHtml); - } - - public function testOuterHtmlNoValueAttribute() - { - $parent = new HtmlNode('div'); - $parent->setAttribute('class', [ - 'value' => 'all', - 'doubleQuote' => true, - ]); - $childa = new HtmlNode('a'); - $childa->setAttribute('href', [ - 'value' => 'http://google.com', - 'doubleQuote' => false, - ]); - $childa->setAttribute('ui-view', null); - $childbr = new HtmlNode('br'); - $childbr->getTag()->selfClosing(); - - $parent->addChild($childa); - $parent->addChild($childbr); - $childa->addChild(new TextNode('link')); - - $this->assertEquals('', $parent->outerHtml); - } - - public function testText() - { - $a = new Tag('a'); - $node = new HtmlNode($a); - $node->addChild(new TextNode('link')); - - $this->assertEquals('link', $node->text()); - } - - public function testTextTwice() - { - $a = new Tag('a'); - $node = new HtmlNode($a); - $node->addChild(new TextNode('link')); - - $text = $node->text(); - $this->assertEquals($text, $node->text()); - } - - public function testTextNone() - { - $a = new Tag('a'); - $node = new HtmlNode($a); - - $this->assertEmpty($node->text()); - } - - public function testTextMagic() - { - $node = new HtmlNode('a'); - $node->addChild(new TextNode('link')); - - $this->assertEquals('link', $node->text); - } - - public function testTextLookInChildren() - { - $p = new HtmlNode('p'); - $a = new HtmlNode('a'); - $a->addChild(new TextNode('click me')); - $p->addChild(new TextNode('Please ')); - $p->addChild($a); - $p->addChild(new TextNode('!')); - $node = new HtmlNode('div'); - $node->addChild($p); - - $this->assertEquals('Please click me!', $node->text(true)); - } - - public function testTextLookInChildrenAndNoChildren() - { - $p = new HtmlNode('p'); - $a = new HtmlNode('a'); - $a->addChild(new TextNode('click me')); - $p->addChild(new TextNode('Please ')); - $p->addChild($a); - $p->addChild(new TextNode('!')); - - $p->text; - $p->text(true); - - $this->assertEquals('Please click me!', $p->text(true)); - } - - public function testGetAttribute() - { - $node = new HtmlNode('a'); - $node->getTag()->setAttributes([ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => false, - ], - 'class' => [ - 'value' => 'outerlink rounded', - 'doubleQuote' => true, - ], - ]); - - $this->assertEquals('outerlink rounded', $node->getAttribute('class')); - } - - public function testGetAttributeMagic() - { - $node = new HtmlNode('a'); - $node->getTag()->setAttributes([ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => false, - ], - 'class' => [ - 'value' => 'outerlink rounded', - 'doubleQuote' => true, - ], - ]); - - $this->assertEquals('http://google.com', $node->href); - } - - public function testGetAttributes() - { - $node = new HtmlNode('a'); - $node->getTag()->setAttributes([ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => false, - ], - 'class' => [ - 'value' => 'outerlink rounded', - 'doubleQuote' => true, - ], - ]); - - $this->assertEquals('outerlink rounded', $node->getAttributes()['class']); - } - - public function testSetAttribute() - { - $node = new HtmlNode('a'); - $node->setAttribute('class', 'foo'); - $this->assertEquals('foo', $node->getAttribute('class')); - } - - public function testCountable() - { - $div = new Tag('div'); - $div->setAttributes([ - 'class' => [ - 'value' => 'all', - 'doubleQuote' => true, - ], - ]); - $a = new Tag('a'); - $a->setAttributes([ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => false, - ], - ]); - $br = new Tag('br'); - $br->selfClosing(); - - $parent = new HtmlNode($div); - $childa = new HtmlNode($a); - $childbr = new HtmlNode($br); - $parent->addChild($childa); - $parent->addChild($childbr); - $childa->addChild(new TextNode('link')); - - $this->assertEquals(count($parent->getChildren()), count($parent)); - } - - public function testIterator() - { - $div = new Tag('div'); - $div->setAttributes([ - 'class' => [ - 'value' => 'all', - 'doubleQuote' => true, - ], - ]); - $a = new Tag('a'); - $a->setAttributes([ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => false, - ], - ]); - $br = new Tag('br'); - $br->selfClosing(); - - $parent = new HtmlNode($div); - $childa = new HtmlNode($a); - $childbr = new HtmlNode($br); - $parent->addChild($childa); - $parent->addChild($childbr); - $childa->addChild(new TextNode('link')); - - $children = 0; - foreach ($parent as $child) { - ++$children; - } - $this->assertEquals(2, $children); - } +use PHPUnit\Framework\TestCase; + +class NodeHtmlTest extends TestCase +{ + public function testInnerHtml() + { + $div = new Tag('div'); + $div->setAttributes([ + 'class' => [ + 'value' => 'all', + 'doubleQuote' => true, + ], + ]); + $a = new Tag('a'); + $a->setAttributes([ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + ]); + $br = new Tag('br'); + $br->selfClosing(); + + $parent = new HtmlNode($div); + $childa = new HtmlNode($a); + $childbr = new HtmlNode($br); + $parent->addChild($childa); + $parent->addChild($childbr); + $childa->addChild(new TextNode('link')); + + $this->assertEquals("link
", $parent->innerHtml()); + } + + public function testInnerHtmlTwice() + { + $div = new Tag('div'); + $div->setAttributes([ + 'class' => [ + 'value' => 'all', + 'doubleQuote' => true, + ], + ]); + $a = new Tag('a'); + $br = new Tag('br'); + $br->selfClosing(); + + $parent = new HtmlNode($div); + $childa = new HtmlNode($a); + $childa->setAttribute('href', 'http://google.com', false); + $childbr = new HtmlNode($br); + $parent->addChild($childa); + $parent->addChild($childbr); + $childa->addChild(new TextNode('link')); + + $inner = $parent->innerHtml(); + $this->assertEquals($inner, $parent->innerHtml()); + } + + /** + * @expectedException \PHPHtmlParser\Exceptions\UnknownChildTypeException + */ + public function testInnerHtmlUnkownChild() + { + $div = new Tag('div'); + $div->setAttributes([ + 'class' => [ + 'value' => 'all', + 'doubleQuote' => true, + ], + ]); + $a = new Tag('a'); + $a->setAttributes([ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + ]); + $br = new Tag('br'); + $br->selfClosing(); + + $parent = new HtmlNode($div); + $childa = new HtmlNode($a); + $childbr = new MockNode($br); + $parent->addChild($childa); + $parent->addChild($childbr); + $childa->addChild(new TextNode('link')); + + $inner = $parent->innerHtml(); + $this->assertEquals($inner, $parent->innerHtml()); + } + + public function testInnerHtmlMagic() + { + $parent = new HtmlNode('div'); + $parent->tag->setAttributes([ + 'class' => [ + 'value' => 'all', + 'doubleQuote' => true, + ], + ]); + $childa = new HtmlNode('a'); + $childa->getTag()->setAttributes([ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + ]); + $childbr = new HtmlNode('br'); + $childbr->getTag()->selfClosing(); + + $parent->addChild($childa); + $parent->addChild($childbr); + $childa->addChild(new TextNode('link')); + + $this->assertEquals("link
", $parent->innerHtml); + } + + public function testOuterHtml() + { + $div = new Tag('div'); + $div->setAttributes([ + 'class' => [ + 'value' => 'all', + 'doubleQuote' => true, + ], + ]); + $a = new Tag('a'); + $a->setAttributes([ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + ]); + $br = new Tag('br'); + $br->selfClosing(); + + $parent = new HtmlNode($div); + $childa = new HtmlNode($a); + $childbr = new HtmlNode($br); + $parent->addChild($childa); + $parent->addChild($childbr); + $childa->addChild(new TextNode('link')); + + $this->assertEquals('', $parent->outerHtml()); + } + + public function testOuterHtmlTwice() + { + $div = new Tag('div'); + $div->setAttributes([ + 'class' => [ + 'value' => 'all', + 'doubleQuote' => true, + ], + ]); + $a = new Tag('a'); + $a->setAttributes([ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + ]); + $br = new Tag('br'); + $br->selfClosing(); + + $parent = new HtmlNode($div); + $childa = new HtmlNode($a); + $childbr = new HtmlNode($br); + $parent->addChild($childa); + $parent->addChild($childbr); + $childa->addChild(new TextNode('link')); + + $outer = $parent->outerHtml(); + $this->assertEquals($outer, $parent->outerHtml()); + } + + public function testOuterHtmlEmpty() + { + $a = new Tag('a'); + $a->setAttributes([ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + ]); + $node = new HtmlNode($a); + + $this->assertEquals("", $node->OuterHtml()); + } + + public function testOuterHtmlMagic() + { + $parent = new HtmlNode('div'); + $parent->getTag()->setAttributes([ + 'class' => [ + 'value' => 'all', + 'doubleQuote' => true, + ], + ]); + $childa = new HtmlNode('a'); + $childa->getTag()->setAttributes([ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + ]); + $childbr = new HtmlNode('br'); + $childbr->getTag()->selfClosing(); + + $parent->addChild($childa); + $parent->addChild($childbr); + $childa->addChild(new TextNode('link')); + + $this->assertEquals('', $parent->outerHtml); + } + + public function testOuterHtmlNoValueAttribute() + { + $parent = new HtmlNode('div'); + $parent->setAttribute('class', 'all'); + $childa = new HtmlNode('a'); + $childa->setAttribute('href', 'http://google.com', false); + $childa->setAttribute('ui-view', null); + $childbr = new HtmlNode('br'); + $childbr->getTag()->selfClosing(); + + $parent->addChild($childa); + $parent->addChild($childbr); + $childa->addChild(new TextNode('link')); + + $this->assertEquals('', $parent->outerHtml); + } + + public function testOuterHtmlWithChanges() + { + $div = new Tag('div'); + $div->setAttributes([ + 'class' => [ + 'value' => 'all', + 'doubleQuote' => true, + ], + ]); + $a = new Tag('a'); + $a->setAttributes([ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + ]); + $br = new Tag('br'); + $br->selfClosing(); + + $parent = new HtmlNode($div); + $childa = new HtmlNode($a); + $childbr = new HtmlNode($br); + $parent->addChild($childa); + $parent->addChild($childbr); + $childa->addChild(new TextNode('link')); + + $this->assertEquals('', $parent->outerHtml()); + + $childa->setAttribute('href', 'https://www.google.com'); + + $this->assertEquals('link', $childa->outerHtml()); + } + + public function testText() + { + $a = new Tag('a'); + $node = new HtmlNode($a); + $node->addChild(new TextNode('link')); + + $this->assertEquals('link', $node->text()); + } + + public function testTextTwice() + { + $a = new Tag('a'); + $node = new HtmlNode($a); + $node->addChild(new TextNode('link')); + + $text = $node->text(); + $this->assertEquals($text, $node->text()); + } + + public function testTextNone() + { + $a = new Tag('a'); + $node = new HtmlNode($a); + + $this->assertEmpty($node->text()); + } + + public function testTextMagic() + { + $node = new HtmlNode('a'); + $node->addChild(new TextNode('link')); + + $this->assertEquals('link', $node->text); + } + + public function testTextLookInChildren() + { + $p = new HtmlNode('p'); + $a = new HtmlNode('a'); + $a->addChild(new TextNode('click me')); + $p->addChild(new TextNode('Please ')); + $p->addChild($a); + $p->addChild(new TextNode('!')); + $node = new HtmlNode('div'); + $node->addChild($p); + + $this->assertEquals('Please click me!', $node->text(true)); + } + + public function testInnerText() + { + $node = new HtmlNode('div'); + $node->addChild(new TextNode('123 ')); + $anode = new HtmlNode('a'); + $anode->addChild(new TextNode('456789 ')); + $span_node = new HtmlNode('span'); + $span_node->addChild(new TextNode('101112')); + + $node->addChild($anode); + $node->addChild($span_node); + $this->assertEquals($node->innerText, '123 456789 101112'); + } + + public function testTextLookInChildrenAndNoChildren() + { + $p = new HtmlNode('p'); + $a = new HtmlNode('a'); + $a->addChild(new TextNode('click me')); + $p->addChild(new TextNode('Please ')); + $p->addChild($a); + $p->addChild(new TextNode('!')); + + $p->text; + $p->text(true); + + $this->assertEquals('Please click me!', $p->text(true)); + } + + public function testGetAttribute() + { + $node = new HtmlNode('a'); + $node->getTag()->setAttributes([ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + 'class' => [ + 'value' => 'outerlink rounded', + 'doubleQuote' => true, + ], + ]); + + $this->assertEquals('outerlink rounded', $node->getAttribute('class')); + } + + public function testGetAttributeMagic() + { + $node = new HtmlNode('a'); + $node->getTag()->setAttributes([ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + 'class' => [ + 'value' => 'outerlink rounded', + 'doubleQuote' => true, + ], + ]); + + $this->assertEquals('http://google.com', $node->href); + } + + public function testGetAttributes() + { + $node = new HtmlNode('a'); + $node->getTag()->setAttributes([ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + 'class' => [ + 'value' => 'outerlink rounded', + 'doubleQuote' => true, + ], + ]); + + $this->assertEquals('outerlink rounded', $node->getAttributes()['class']); + } + + public function testSetAttribute() + { + $node = new HtmlNode('a'); + $node->setAttribute('class', 'foo'); + $this->assertEquals('foo', $node->getAttribute('class')); + } + + public function testRemoveAttribute() + { + $node = new HtmlNode('a'); + $node->setAttribute('class', 'foo'); + $node->removeAttribute('class'); + $this->assertnull($node->getAttribute('class')); + } + + public function testRemoveAllAttributes() + { + $node = new HtmlNode('a'); + $node->setAttribute('class', 'foo'); + $node->setAttribute('href', 'http://google.com'); + $node->removeAllAttributes(); + $this->assertEquals(0, \count($node->getAttributes())); + } + + public function testSetTag() + { + $node = new HtmlNode('div'); + $this->assertEquals('
', $node->outerHtml()); + + $node->setTag('p'); + $this->assertEquals('

', $node->outerHtml()); + + $node->setTag(new Tag('span')); + $this->assertEquals('', $node->outerHtml()); + } + + public function testCountable() + { + $div = new Tag('div'); + $div->setAttributes([ + 'class' => [ + 'value' => 'all', + 'doubleQuote' => true, + ], + ]); + $a = new Tag('a'); + $a->setAttributes([ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + ]); + $br = new Tag('br'); + $br->selfClosing(); + + $parent = new HtmlNode($div); + $childa = new HtmlNode($a); + $childbr = new HtmlNode($br); + $parent->addChild($childa); + $parent->addChild($childbr); + $childa->addChild(new TextNode('link')); + + $this->assertEquals(\count($parent->getChildren()), \count($parent)); + } + + public function testIterator() + { + $div = new Tag('div'); + $div->setAttributes([ + 'class' => [ + 'value' => 'all', + 'doubleQuote' => true, + ], + ]); + $a = new Tag('a'); + $a->setAttributes([ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + ]); + $br = new Tag('br'); + $br->selfClosing(); + + $parent = new HtmlNode($div); + $childa = new HtmlNode($a); + $childbr = new HtmlNode($br); + $parent->addChild($childa); + $parent->addChild($childbr); + $childa->addChild(new TextNode('link')); + + $children = 0; + foreach ($parent as $child) { + ++$children; + } + $this->assertEquals(2, $children); + } + + /** + * @expectedException \PHPHtmlParser\Exceptions\ParentNotFoundException + */ + public function testAncestorByTagFailure() + { + $a = new Tag('a'); + $node = new HtmlNode($a); + $node->ancestorByTag('div'); + } + + public function testReplaceNode() + { + $dom = new Dom(); + $dom->loadStr('

Hey bro, click here
:)

'); + $id = $dom->find('p')[0]->id(); + $newChild = new HtmlNode('h1'); + $dom->find('p')[0]->getParent()->replaceChild($id, $newChild); + $this->assertEquals('

', (string) $dom); + } + + public function testTextNodeFirstChild() + { + $dom = new Dom(); + $dom->loadStr('

Hey bro, click here
:)

'); + $p = $dom->find('p'); + foreach ($p as $element) { + $child = $element->firstChild(); + $this->assertInstanceOf(TextNode::class, $child); + break; + } + } } diff --git a/tests/Node/ParentTest.php b/tests/Node/ParentTest.php old mode 100644 new mode 100755 index c06daa41..8a87aa32 --- a/tests/Node/ParentTest.php +++ b/tests/Node/ParentTest.php @@ -1,132 +1,315 @@ addChild($child); - $this->assertTrue($parent->hasChildren()); - } - - public function testHasChildNoChildren() - { - $node = new Node; - $this->assertFalse($node->hasChildren()); - } - - public function testAddChild() - { - $parent = new Node; - $child = new Node; - $this->assertTrue($parent->addChild($child)); - } - - public function testAddChildTwoParent() - { - $parent = new Node; - $parent2 = new Node; - $child = new Node; - $parent->addChild($child); - $parent2->addChild($child); - $this->assertFalse($parent->hasChildren()); - } - - public function testGetChild() - { - $parent = new Node; - $child = new Node; - $child2 = new Node; - $parent->addChild($child); - $parent->addChild($child2); - $this->assertTrue($parent->getChild($child2->id()) instanceof Node); - } - - public function testRemoveChild() - { - $parent = new Node; - $child = new Node; - $parent->addChild($child); - $parent->removeChild($child->id()); - $this->assertFalse($parent->hasChildren()); - } - - public function testNextChild() - { - $parent = new Node; - $child = new Node; - $child2 = new Node; - $parent->addChild($child); - $parent->addChild($child2); - - $this->assertEquals($child2->id(), $parent->nextChild($child->id())->id()); - } - - public function testNextChildWithRemove() - { - $parent = new Node; - $child = new Node; - $child2 = new Node; - $child3 = new Node; - $parent->addChild($child); - $parent->addChild($child2); - $parent->addChild($child3); - - $parent->removeChild($child2->id()); - $this->assertEquals($child3->id(), $parent->nextChild($child->id())->id()); - } - - public function testPreviousChild() - { - $parent = new Node; - $child = new Node; - $child2 = new Node; - $parent->addChild($child); - $parent->addChild($child2); - - $this->assertEquals($child->id(), $parent->previousChild($child2->id())->id()); - } - - public function testPreviousChildWithRemove() - { - $parent = new Node; - $child = new Node; - $child2 = new Node; - $child3 = new Node; - $parent->addChild($child); - $parent->addChild($child2); - $parent->addChild($child3); - - $parent->removeChild($child2->id()); - $this->assertEquals($child->id(), $parent->previousChild($child3->id())->id()); - } - - public function testFirstChild() - { - $parent = new Node; - $child = new Node; - $child2 = new Node; - $child3 = new Node; - $parent->addChild($child); - $parent->addChild($child2); - $parent->addChild($child3); - - $this->assertEquals($child->id(), $parent->firstChild()->id()); - } - - public function testLastChild() - { - $parent = new Node; - $child = new Node; - $child2 = new Node; - $child3 = new Node; - $parent->addChild($child); - $parent->addChild($child2); - $parent->addChild($child3); - - $this->assertEquals($child3->id(), $parent->lastChild()->id()); - } +declare(strict_types=1); +require_once 'tests/data/MockNode.php'; + +use PHPHtmlParser\Dom\Node\MockNode as Node; +use PHPUnit\Framework\TestCase; + +class NodeParentTest extends TestCase +{ + public function testHasChild() + { + $parent = new Node(); + $child = new Node(); + $parent->addChild($child); + $this->assertTrue($parent->hasChildren()); + } + + public function testHasChildNoChildren() + { + $node = new Node(); + $this->assertFalse($node->hasChildren()); + } + + public function testAddChild() + { + $parent = new Node(); + $child = new Node(); + $this->assertTrue($parent->addChild($child)); + } + + public function testAddChildTwoParent() + { + $parent = new Node(); + $parent2 = new Node(); + $child = new Node(); + $parent->addChild($child); + $parent2->addChild($child); + $this->assertFalse($parent->hasChildren()); + } + + public function testGetChild() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $parent->addChild($child); + $parent->addChild($child2); + $this->assertTrue($parent->getChild($child2->id()) instanceof Node); + } + + public function testRemoveChild() + { + $parent = new Node(); + $child = new Node(); + $parent->addChild($child); + $parent->removeChild($child->id()); + $this->assertFalse($parent->hasChildren()); + } + + public function testRemoveChildNotExists() + { + $parent = new Node(); + $parent->removeChild(1); + $this->assertFalse($parent->hasChildren()); + } + + public function testNextChild() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $parent->addChild($child); + $parent->addChild($child2); + + $this->assertEquals($child2->id(), $parent->nextChild($child->id())->id()); + } + + public function testHasNextChild() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $parent->addChild($child); + $parent->addChild($child2); + + $this->assertEquals($child2->id(), $parent->hasNextChild($child->id())); + } + + public function testHasNextChildNotExists() + { + $parent = new Node(); + $child = new Node(); + + $this->expectException(\PHPHtmlParser\Exceptions\ChildNotFoundException::class); + $parent->hasNextChild($child->id()); + } + + public function testNextChildWithRemove() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $child3 = new Node(); + $parent->addChild($child); + $parent->addChild($child2); + $parent->addChild($child3); + + $parent->removeChild($child2->id()); + $this->assertEquals($child3->id(), $parent->nextChild($child->id())->id()); + } + + public function testPreviousChild() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $parent->addChild($child); + $parent->addChild($child2); + + $this->assertEquals($child->id(), $parent->previousChild($child2->id())->id()); + } + + public function testPreviousChildWithRemove() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $child3 = new Node(); + $parent->addChild($child); + $parent->addChild($child2); + $parent->addChild($child3); + + $parent->removeChild($child2->id()); + $this->assertEquals($child->id(), $parent->previousChild($child3->id())->id()); + } + + public function testFirstChild() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $child3 = new Node(); + $parent->addChild($child); + $parent->addChild($child2); + $parent->addChild($child3); + + $this->assertEquals($child->id(), $parent->firstChild()->id()); + } + + public function testLastChild() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $child3 = new Node(); + $parent->addChild($child); + $parent->addChild($child2); + $parent->addChild($child3); + + $this->assertEquals($child3->id(), $parent->lastChild()->id()); + } + + public function testInsertBeforeFirst() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $child3 = new Node(); + $parent->addChild($child2); + $parent->addChild($child3); + + $parent->insertBefore($child, $child2->id()); + + $this->assertTrue($parent->isChild($child->id())); + $this->assertEquals($parent->firstChild()->id(), $child->id()); + $this->assertEquals($child->nextSibling()->id(), $child2->id()); + $this->assertEquals($child2->nextSibling()->id(), $child3->id()); + $this->assertEquals($parent->lastChild()->id(), $child3->id()); + } + + public function testInsertBeforeLast() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $child3 = new Node(); + $parent->addChild($child); + $parent->addChild($child3); + + $parent->insertBefore($child2, $child3->id()); + + $this->assertTrue($parent->isChild($child2->id())); + $this->assertEquals($parent->firstChild()->id(), $child->id()); + $this->assertEquals($child->nextSibling()->id(), $child2->id()); + $this->assertEquals($child2->nextSibling()->id(), $child3->id()); + $this->assertEquals($parent->lastChild()->id(), $child3->id()); + } + + public function testInsertAfterFirst() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $child3 = new Node(); + $parent->addChild($child); + $parent->addChild($child3); + + $parent->insertAfter($child2, $child->id()); + + $this->assertTrue($parent->isChild($child2->id())); + $this->assertEquals($parent->firstChild()->id(), $child->id()); + $this->assertEquals($child->nextSibling()->id(), $child2->id()); + $this->assertEquals($child2->nextSibling()->id(), $child3->id()); + $this->assertEquals($parent->lastChild()->id(), $child3->id()); + } + + public function testInsertAfterLast() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $child3 = new Node(); + $parent->addChild($child); + $parent->addChild($child2); + + $parent->insertAfter($child3, $child2->id()); + + $this->assertTrue($parent->isChild($child2->id())); + $this->assertEquals($parent->firstChild()->id(), $child->id()); + $this->assertEquals($child->nextSibling()->id(), $child2->id()); + $this->assertEquals($child2->nextSibling()->id(), $child3->id()); + $this->assertEquals($parent->lastChild()->id(), $child3->id()); + } + + public function testReplaceChild() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $child3 = new Node(); + $parent->addChild($child); + $parent->addChild($child2); + $parent->replaceChild($child->id(), $child3); + + $this->assertFalse($parent->isChild($child->id())); + } + + /** + * @expectedException \PHPHtmlParser\Exceptions\CircularException + */ + public function testSetParentDescendantException() + { + $parent = new Node(); + $child = new Node(); + $parent->addChild($child); + $parent->setParent($child); + } + + /** + * @expectedException \PHPHtmlParser\Exceptions\CircularException + */ + public function testAddChildAncestorException() + { + $parent = new Node(); + $child = new Node(); + $parent->addChild($child); + $child->addChild($parent); + } + + /** + * @expectedException \PHPHtmlParser\Exceptions\CircularException + */ + public function testAddItselfAsChild() + { + $parent = new Node(); + $parent->addChild($parent); + } + + public function testIsAncestorParent() + { + $parent = new Node(); + $child = new Node(); + $parent->addChild($child); + $this->assertTrue($child->isAncestor($parent->id())); + } + + public function testGetAncestor() + { + $parent = new Node(); + $child = new Node(); + $parent->addChild($child); + $ancestor = $child->getAncestor($parent->id()); + $this->assertEquals($parent->id(), $ancestor->id()); + } + + public function testGetGreatAncestor() + { + $parent = new Node(); + $child = new Node(); + $child2 = new Node(); + $parent->addChild($child); + $child->addChild($child2); + $ancestor = $child2->getAncestor($parent->id()); + $this->assertNotNull($ancestor); + $this->assertEquals($parent->id(), $ancestor->id()); + } + + public function testGetAncestorNotFound() + { + $parent = new Node(); + $ancestor = $parent->getAncestor(1); + $this->assertNull($ancestor); + } } diff --git a/tests/Node/TagTest.php b/tests/Node/TagTest.php old mode 100644 new mode 100755 index 73109f43..f14de7e2 --- a/tests/Node/TagTest.php +++ b/tests/Node/TagTest.php @@ -1,132 +1,194 @@ selfClosing(); - $this->assertTrue($tag->isSelfClosing()); - } - - public function testSetAttributes() - { - $attr = [ - 'href' => [ - 'value' => 'http://google.com', - 'doublequote' => false, - ], - ]; - - $tag = new Tag('a'); - $tag->setAttributes($attr); - $this->assertEquals('http://google.com', $tag->getAttribute('href')['value']); - } - - public function testSetAttributeNoArray() - { - $tag = new Tag('a'); - $tag->setAttribute('href', 'http://google.com'); - $this->assertEquals('http://google.com', $tag->getAttribute('href')['value']); - } - - public function testSetAttributesNoDoubleArray() - { - $attr = [ - 'href' => 'http://google.com', - 'class' => 'funtimes', - ]; - - $tag = new Tag('a'); - $tag->setAttributes($attr); - $this->assertEquals('funtimes', $tag->class['value']); - } - - public function testNoise() - { - $tag = new Tag('a'); - $this->assertTrue($tag->noise('noise') instanceof Tag); - } - - public function testGetAttributeMagic() - { - $attr = [ - 'href' => [ - 'value' => 'http://google.com', - 'doublequote' => false, - ], - ]; - - $tag = new Tag('a'); - $tag->setAttributes($attr); - $this->assertEquals('http://google.com', $tag->href['value']); - } - - public function testSetAttributeMagic() - { - $tag = new Tag('a'); - $tag->href = 'http://google.com'; - $this->assertEquals('http://google.com', $tag->href['value']); - } - - public function testMakeOpeningTag() - { - $attr = [ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => true, - ], - ]; - - $tag = new Tag('a'); - $tag->setAttributes($attr); - $this->assertEquals('', $tag->makeOpeningTag()); - } - - public function testMakeOpeningTagEmptyAttr() - { - $attr = [ - 'href' => [ - 'value' => 'http://google.com', - 'doubleQuote' => true, - ], - ]; - - $tag = new Tag('a'); - $tag->setAttributes($attr); - $tag->selected = [ - 'value' => null, - ]; - $this->assertEquals('', $tag->makeOpeningTag()); - } - - public function testMakeOpeningTagSelfClosing() - { - $attr = [ - 'class' => [ - 'value' => 'clear-fix', - 'doubleQuote' => true, - ], - ]; - - $tag = new Tag('div'); - $tag->selfClosing() - ->setAttributes($attr); - $this->assertEquals('
', $tag->makeOpeningTag()); - } - - public function testMakeClosingTag() - { - $tag = new Tag('a'); - $this->assertEquals('', $tag->makeClosingTag()); - } - - public function testMakeClosingTagSelfClosing() - { - $tag = new Tag('div'); - $tag->selfClosing(); - $this->assertEmpty($tag->makeClosingTag()); - } +use PHPHtmlParser\Dom\Tag; +use PHPUnit\Framework\TestCase; + +class NodeTagTest extends TestCase +{ + public function testSelfClosing() + { + $tag = new Tag('a'); + $tag->selfClosing(); + $this->assertTrue($tag->isSelfClosing()); + } + + public function testSetAttributes() + { + $attr = [ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + ]; + + $tag = new Tag('a'); + $tag->setAttributes($attr); + $this->assertEquals('http://google.com', $tag->getAttribute('href')->getValue()); + } + + /** + * @expectedException \PHPHtmlParser\Exceptions\Tag\AttributeNotFoundException + */ + public function testRemoveAttribute() + { + $tag = new Tag('a'); + $tag->setAttribute('href', 'http://google.com'); + $tag->removeAttribute('href'); + $tag->getAttribute('href'); + } + + public function testRemoveAllAttributes() + { + $tag = new Tag('a'); + $tag->setAttribute('href', 'http://google.com'); + $tag->setAttribute('class', 'clear-fix', true); + $tag->removeAllAttributes(); + $this->assertEquals(0, \count($tag->getAttributes())); + } + + public function testSetAttributeNoArray() + { + $tag = new Tag('a'); + $tag->setAttribute('href', 'http://google.com'); + $this->assertEquals('http://google.com', $tag->getAttribute('href')->getValue()); + } + + public function testSetAttributesNoDoubleArray() + { + $attr = [ + 'href' => 'http://google.com', + 'class' => 'funtimes', + ]; + + $tag = new Tag('a'); + $tag->setAttributes($attr); + $this->assertEquals('funtimes', $tag->getAttribute('class')->getValue()); + } + + public function testUpdateAttributes() + { + $tag = new Tag('a'); + $tag->setAttributes([ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + 'class' => [ + 'value' => null, + 'doubleQuote' => true, + ], + ]); + + $this->assertEquals(null, $tag->getAttribute('class')->getValue()); + $this->assertEquals('http://google.com', $tag->getAttribute('href')->getValue()); + + $attr = [ + 'href' => 'https://www.google.com', + 'class' => 'funtimes', + ]; + + $tag->setAttributes($attr); + $this->assertEquals('funtimes', $tag->getAttribute('class')->getValue()); + $this->assertEquals('https://www.google.com', $tag->getAttribute('href')->getValue()); + } + + public function testNoise() + { + $tag = new Tag('a'); + $this->assertTrue($tag->noise('noise') instanceof Tag); + } + + public function testGetAttributeMagic() + { + $attr = [ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => false, + ], + ]; + + $tag = new Tag('a'); + $tag->setAttributes($attr); + $this->assertEquals('http://google.com', $tag->getAttribute('href')->getValue()); + } + + public function testSetAttributeMagic() + { + $tag = new Tag('a'); + $tag->setAttribute('href', 'http://google.com'); + $this->assertEquals('http://google.com', $tag->getAttribute('href')->getValue()); + } + + public function testMakeOpeningTag() + { + $attr = [ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => true, + ], + ]; + + $tag = new Tag('a'); + $tag->setAttributes($attr); + $this->assertEquals('', $tag->makeOpeningTag()); + } + + public function testMakeOpeningTagEmptyAttr() + { + $attr = [ + 'href' => [ + 'value' => 'http://google.com', + 'doubleQuote' => true, + ], + ]; + + $tag = new Tag('a'); + $tag->setAttributes($attr); + $tag->setAttribute('selected', null); + $this->assertEquals('', $tag->makeOpeningTag()); + } + + public function testMakeOpeningTagSelfClosing() + { + $attr = [ + 'class' => [ + 'value' => 'clear-fix', + 'doubleQuote' => true, + ], + ]; + + $tag = (new Tag('div')) + ->selfClosing() + ->setAttributes($attr); + $this->assertEquals('
', $tag->makeOpeningTag()); + } + + public function testMakeClosingTag() + { + $tag = new Tag('a'); + $this->assertEquals('', $tag->makeClosingTag()); + } + + public function testMakeClosingTagSelfClosing() + { + $tag = new Tag('div'); + $tag->selfClosing(); + $this->assertEmpty($tag->makeClosingTag()); + } + + public function testSetTagAttribute() + { + $tag = new Tag('div'); + $tag->setStyleAttributeValue('display', 'none'); + $this->assertEquals('display:none;', $tag->getAttribute('style')->getValue()); + } + + public function testGetStyleAttributesArray() + { + $tag = new Tag('div'); + $tag->setStyleAttributeValue('display', 'none'); + $this->assertInternalType('array', $tag->getStyleAttributeArray()); + } } diff --git a/tests/Node/TextTest.php b/tests/Node/TextTest.php old mode 100644 new mode 100755 index a07116d0..f94c4962 --- a/tests/Node/TextTest.php +++ b/tests/Node/TextTest.php @@ -1,25 +1,78 @@ assertEquals('foo bar', $node->text()); - } - - public function testGetTag() - { - $node = new TextNode('foo bar'); - $this->assertEquals('text', $node->getTag()->name()); - } - - public function testAncestorByTag() - { - $node = new TextNode('foo bar'); - $text = $node->ancestorByTag('text'); - $this->assertEquals($node, $text); - } +declare(strict_types=1); + +use PHPHtmlParser\Dom; +use PHPHtmlParser\Dom\Node\TextNode; +use PHPHtmlParser\Options; +use PHPUnit\Framework\TestCase; +use stringEncode\Encode; + +class NodeTextTest extends TestCase +{ + public function testText() + { + $node = new TextNode('foo bar'); + $this->assertEquals('foo bar', $node->text()); + } + + public function testGetTag() + { + $node = new TextNode('foo bar'); + $this->assertEquals('text', $node->getTag()->name()); + } + + public function testAncestorByTag() + { + $node = new TextNode('foo bar'); + $text = $node->ancestorByTag('text'); + $this->assertEquals($node, $text); + } + + public function testPreserveEntity() + { + $node = new TextNode('i'); + $text = $node->outerhtml; + $this->assertEquals('i', $text); + } + + public function testIsTextNode() + { + $node = new TextNode('text'); + $this->assertEquals(true, $node->isTextNode()); + } + + public function testTextInTextNode() + { + $node = new TextNode('foo bar'); + $this->assertEquals('foo bar', $node->outerHtml()); + } + + public function testSetTextToTextNode() + { + $node = new TextNode(''); + $node->setText('foo bar'); + $this->assertEquals('foo bar', $node->innerHtml()); + } + + public function testSetText() + { + $dom = new Dom(); + $dom->loadStr('

Hey bro, click here
:)

'); + $a = $dom->find('a')[0]; + $a->firstChild()->setText('biz baz'); + $this->assertEquals('

Hey bro, biz baz
:)

', (string) $dom); + } + + public function testSetTextEncoded() + { + $encode = new Encode(); + $encode->from('UTF-8'); + $encode->to('UTF-8'); + + $node = new TextNode('foo bar'); + $node->propagateEncoding($encode); + $node->setText('biz baz'); + $this->assertEquals('biz baz', $node->text()); + } } diff --git a/tests/Options/CleanupTest.php b/tests/Options/CleanupTest.php new file mode 100755 index 00000000..914078ac --- /dev/null +++ b/tests/Options/CleanupTest.php @@ -0,0 +1,83 @@ +setOptions((new Options())->setCleanupInput(true)); + $dom->loadFromFile('tests/data/files/big.html'); + $this->assertEquals(0, \count($dom->find('style'))); + $this->assertEquals(0, \count($dom->find('script'))); + } + + public function testCleanupInputFalse() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setCleanupInput(false)); + $dom->loadFromFile('tests/data/files/big.html'); + $this->assertEquals(1, \count($dom->find('style'))); + $this->assertEquals(22, \count($dom->find('script'))); + } + + public function testRemoveStylesTrue() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setRemoveStyles(true)); + $dom->loadFromFile('tests/data/files/big.html'); + $this->assertEquals(0, \count($dom->find('style'))); + } + + public function testRemoveStylesFalse() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setRemoveStyles(false)); + $dom->loadFromFile('tests/data/files/big.html'); + $this->assertEquals(1, \count($dom->find('style'))); + $this->assertEquals('text/css', + $dom->find('style')->getAttribute('type')); + } + + public function testRemoveScriptsTrue() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setRemoveScripts(true)); + $dom->loadFromFile('tests/data/files/big.html'); + $this->assertEquals(0, \count($dom->find('script'))); + } + + public function testRemoveScriptsFalse() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setRemoveScripts(false)); + $dom->loadFromFile('tests/data/files/big.html'); + $this->assertEquals(22, \count($dom->find('script'))); + $this->assertEquals('text/javascript', + $dom->find('script')->getAttribute('type')); + } + + public function testSmartyScripts() + { + $dom = new Dom(); + $dom->loadStr(' + aa={123} + '); + $this->assertEquals(' aa= ', $dom->innerHtml); + } + + public function testSmartyScriptsDisabled() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setRemoveSmartyScripts(false)); + $dom->loadStr(' + aa={123} + '); + $this->assertEquals(' aa={123} ', $dom->innerHtml); + } +} diff --git a/tests/Options/NoSlashTest.php b/tests/Options/NoSlashTest.php new file mode 100644 index 00000000..93370b56 --- /dev/null +++ b/tests/Options/NoSlashTest.php @@ -0,0 +1,45 @@ +setOptions((new Options())->addNoSlashTag('br')); + + $dom->loadStr('

Hey bro, click here

'); + $this->assertEquals('

Hey bro, click here

', $dom->find('div', 0)->innerHtml); + } + + public function testLoadClosingTagOnSelfClosingRemoveNoSlash() + { + $dom = new Dom(); + $dom->setOptions( + (new Options()) + ->addNoSlashTag('br') + ->removeNoSlashTag('br') + ); + + $dom->loadStr('

Hey bro, click here

'); + $this->assertEquals('

Hey bro, click here

', $dom->find('div', 0)->innerHtml); + } + + public function testLoadClosingTagOnSelfClosingClearNoSlash() + { + $dom = new Dom(); + $dom->setOptions( + (new Options()) + ->addNoSlashTag('br') + ->clearNoSlashTags() + ); + + $dom->loadStr('

Hey bro, click here

'); + $this->assertEquals('

Hey bro, click here

', $dom->find('div', 0)->innerHtml); + } +} diff --git a/tests/Options/PreserveLineBreaks.php b/tests/Options/PreserveLineBreaks.php new file mode 100755 index 00000000..be396490 --- /dev/null +++ b/tests/Options/PreserveLineBreaks.php @@ -0,0 +1,31 @@ +setOptions((new Options())->setPreserveLineBreaks(true)); + + $dom->loadStr('
+
'); + + $this->assertEquals("
\n
", (string) $dom); + } + + public function testPreserveLineBreakBeforeClosingTag() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setPreserveLineBreaks(true)); + $dom->loadStr('
'); + + $this->assertEquals('
', (string) $dom); + } +} diff --git a/tests/Options/SelfClosingTest.php b/tests/Options/SelfClosingTest.php new file mode 100644 index 00000000..f1349821 --- /dev/null +++ b/tests/Options/SelfClosingTest.php @@ -0,0 +1,45 @@ +setOptions((new Options())->addSelfClosingTag('mytag')); + $dom->loadStr('

Hey bro, click here

'); + $this->assertEquals('

Hey bro, click here

', $dom->find('div', 0)->innerHtml); + } + + public function testLoadClosingTagAddSelfClosingTagArray() + { + $dom = new Dom(); + $dom->setOptions((new Options())->addSelfClosingTags([ + 'mytag', + 'othertag', + ])); + $dom->loadStr('

Hey bro, click here

'); + $this->assertEquals('

Hey bro, click here

', $dom->find('div', 0)->innerHtml); + } + + public function testLoadClosingTagRemoveSelfClosingTag() + { + $dom = new Dom(); + $dom->setOptions((new Options())->removeSelfClosingTag('br')); + $dom->loadStr('

Hey bro, click here

'); + $this->assertEquals('

Hey bro, click here


', $dom->find('div', 0)->innerHtml); + } + + public function testLoadClosingTagClearSelfClosingTag() + { + $dom = new Dom(); + $dom->setOptions((new Options())->clearSelfClosingTags()); + $dom->loadStr('

Hey bro, click here

'); + $this->assertEquals('

Hey bro, click here


', $dom->find('div', 0)->innerHtml); + } +} diff --git a/tests/Options/StrictTest.php b/tests/Options/StrictTest.php old mode 100644 new mode 100755 index ad01f6b8..709f292d --- a/tests/Options/StrictTest.php +++ b/tests/Options/StrictTest.php @@ -1,55 +1,55 @@ setOptions((new Options())->setStrict(true)); + $dom->loadStr('

Hey you

Ya you!

'); + $this->assertEquals(' ', $dom->getElementById('hey')->nextSibling()->text); + } - public function testConfigStrict() - { - $dom = new Dom; - $dom->setOptions([ - 'strict' => true, - ]); - $dom->load('

Hey you

Ya you!

'); - $this->assertEquals(' ', $dom->getElementById('hey')->nextSibling()->text); - } + public function testConfigStrictMissingSelfClosing() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setStrict(true)); + try { + // should throw an exception + $dom->loadStr('

Hey you


Ya you!

'); + // we should not get here + $this->assertTrue(false); + } catch (StrictException $e) { + $this->assertEquals("Tag 'br' is not self closing! (character #31)", $e->getMessage()); + } + } - public function testConfigStrictMissingSelfClosing() - { - $dom = new Dom; - $dom->setOptions([ - 'strict' => true, - ]); - try - { - // should throw an exception - $dom->load('

Hey you


Ya you!

'); - // we should not get here - $this->assertTrue(false); - } - catch (StrictException $e) - { - $this->assertEquals("Tag 'br' is not self closing! (character #31)", $e->getMessage()); - } - } + public function testConfigStrictMissingAttribute() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setStrict(true)); + try { + // should throw an exception + $dom->loadStr('

Hey you

Ya you!

'); + // we should not get here + $this->assertTrue(false); + } catch (StrictException $e) { + $this->assertEquals("Tag 'p' has an attribute 'block' with out a value! (character #22)", $e->getMessage()); + } + } - public function testConfigStrictMissingAttribute() - { - $dom = new Dom; - $dom->setOptions([ - 'strict' => true, - ]); - try - { - // should throw an exception - $dom->load('

Hey you

Ya you!

'); - // we should not get here - $this->assertTrue(false); - } - catch (StrictException $e) - { - $this->assertEquals("Tag 'p' has an attribute 'block' with out a value! (character #22)", $e->getMessage()); - } - } + public function testConfigStrictBRTag() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setStrict(true)); + $dom->loadStr('
'); + $this->assertTrue(true); + } } diff --git a/tests/Options/WhitespaceTextNodeTest.php b/tests/Options/WhitespaceTextNodeTest.php old mode 100644 new mode 100755 index c72c3df1..245ef7f0 --- a/tests/Options/WhitespaceTextNodeTest.php +++ b/tests/Options/WhitespaceTextNodeTest.php @@ -1,28 +1,26 @@ setOptions([ - 'whitespaceTextNode' => false, - ]); - $dom->load('

Hey you

Ya you!

'); - $this->assertEquals('Ya you!', $dom->getElementById('hey')->nextSibling()->text); - } +class WhitespaceTextNodeTest extends TestCase +{ + public function testConfigGlobalNoWhitespaceTextNode() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setWhitespaceTextNode(false)); + $dom->loadStr('

Hey you

Ya you!

'); + $this->assertEquals('Ya you!', $dom->getElementById('hey')->nextSibling()->text); + } - public function testConfigLocalOverride() - { - $dom = new Dom; - $dom->setOptions([ - 'whitespaceTextNode' => false, - ]); - $dom->load('

Hey you

Ya you!

', [ - 'whitespaceTextNode' => true, - ]); - $this->assertEquals(' ', $dom->getElementById('hey')->nextSibling()->text); - } + public function testConfigLocalOverride() + { + $dom = new Dom(); + $dom->setOptions((new Options())->setWhitespaceTextNode(false)); + $dom->loadStr('

Hey you

Ya you!

', (new Options())->setWhitespaceTextNode(true)); + $this->assertEquals(' ', $dom->getElementById('hey')->nextSibling()->text); + } } diff --git a/tests/OptionsTest.php b/tests/OptionsTest.php old mode 100644 new mode 100755 index b5398c16..43dd6fb6 --- a/tests/OptionsTest.php +++ b/tests/OptionsTest.php @@ -1,43 +1,72 @@ assertTrue($options->whitespaceTextNode); - } - - public function testAddingOption() - { - $options = new Options; - $options->setOptions([ - 'test' => true, - ]); - - $this->assertTrue($options->test); - } - - public function testAddingOver() - { - $options = new Options; - $options->setOptions([ - 'test' => false, - ])->setOptions([ - 'test' => true, - 'whitespaceTextNode' => false, - ]); - - $this->assertFalse($options->get('whitespaceTextNode')); - } - - public function testGettingNoOption() - { - $options = new Options; - $this->assertEquals(null, $options->get('doesnotexist')); - } -} +class OptionsTest extends TestCase +{ + public function testDefaultWhitespaceTextNode() + { + $options = new Options(); + + $this->assertTrue($options->isWhitespaceTextNode()); + } + + public function testSettingOption() + { + $options = new Options(); + $options->setStrict(true); + + $this->assertTrue($options->isStrict()); + } + + public function testOverwritingOption() + { + $options = new Options(); + $options->setStrict(false); + $options2 = new Options(); + $options2->setStrict(true); + $options2->setWhitespaceTextNode(false); + $options = $options->setFromOptions($options2); + + $this->assertTrue($options->isStrict()); + $this->assertFalse($options->isWhitespaceTextNode()); + } + + public function testSetters() + { + $options = new Options(); + + $options->setWhitespaceTextNode(true); + $this->assertTrue($options->isWhitespaceTextNode()); + $options->setStrict(true); + $this->assertTrue($options->isStrict()); + + $options->setEnforceEncoding('utf8'); + $this->assertEquals('utf8', $options->getEnforceEncoding()); + + $options->setCleanupInput(true); + $this->assertTrue($options->isCleanupInput()); + + $options->setRemoveScripts(true); + $this->assertTrue($options->isRemoveScripts()); + + $options->setRemoveStyles(true); + $this->assertTrue($options->isRemoveStyles()); + + $options->setPreserveLineBreaks(true); + $this->assertTrue($options->isPreserveLineBreaks()); + + $options->setRemoveDoubleSpace(true); + $this->assertTrue($options->isRemoveDoubleSpace()); + + $options->setRemoveSmartyScripts(true); + $this->assertTrue($options->isRemoveSmartyScripts()); + + $options->setHtmlSpecialCharsDecode(true); + $this->assertTrue($options->isHtmlSpecialCharsDecode()); + } +} diff --git a/tests/Selector/SeekerTest.php b/tests/Selector/SeekerTest.php new file mode 100644 index 00000000..d9e0e824 --- /dev/null +++ b/tests/Selector/SeekerTest.php @@ -0,0 +1,25 @@ +seek([], $ruleDTO, []); + $this->assertCount(0, $results); + } +} diff --git a/tests/Selector/SelectorTest.php b/tests/Selector/SelectorTest.php new file mode 100755 index 00000000..9ef2705d --- /dev/null +++ b/tests/Selector/SelectorTest.php @@ -0,0 +1,235 @@ +getParsedSelectorCollectionDTO(); + $this->assertEquals('id', $selectors->getParsedSelectorDTO()[0]->getRules()[0]->getKey()); + } + + public function testParseSelectorStringClass() + { + $selector = new Selector('div.post', new Parser()); + $selectors = $selector->getParsedSelectorCollectionDTO(); + $this->assertEquals('class', $selectors->getParsedSelectorDTO()[0]->getRules()[0]->getKey()); + } + + public function testParseSelectorStringAttribute() + { + $selector = new Selector('div[visible=yes]', new Parser()); + $selectors = $selector->getParsedSelectorCollectionDTO(); + $this->assertEquals('yes', $selectors->getParsedSelectorDTO()[0]->getRules()[0]->getValue()); + } + + public function testParseSelectorStringNoKey() + { + $selector = new Selector('div[!visible]', new Parser()); + $selectors = $selector->getParsedSelectorCollectionDTO(); + $this->assertTrue($selectors->getParsedSelectorDTO()[0]->getRules()[0]->isNoKey()); + } + + public function testFind() + { + $root = new HtmlNode('root'); + $parent = new HtmlNode('div'); + $child1 = new HtmlNode('a'); + $child2 = new HtmlNode('p'); + $parent->addChild($child1); + $parent->addChild($child2); + $root->addChild($parent); + + $selector = new Selector('div a', new Parser()); + $this->assertEquals($child1->id(), $selector->find($root)[0]->id()); + } + + public function testFindId() + { + $parent = new HtmlNode(new Tag('div')); + $child1 = new HtmlNode(new Tag('a')); + $child2 = new HtmlNode(new Tag('p')); + $child2->getTag()->setAttributes([ + 'id' => [ + 'value' => 'content', + 'doubleQuote' => true, + ], + ]); + $parent->addChild($child1); + $parent->addChild($child2); + + $selector = new Selector('#content', new Parser()); + $this->assertEquals($child2->id(), $selector->find($parent)[0]->id()); + } + + public function testFindClass() + { + $parent = new HtmlNode(new Tag('div')); + $child1 = new HtmlNode(new Tag('a')); + $child2 = new HtmlNode(new Tag('p')); + $child3 = new HtmlNode('a'); + $child3->getTag()->setAttributes([ + 'class' => [ + 'value' => 'link', + 'doubleQuote' => true, + ], + ]); + $parent->addChild($child1); + $parent->addChild($child2); + $parent->addChild($child3); + + $selector = new Selector('.link', new Parser()); + $this->assertEquals($child3->id(), $selector->find($parent)[0]->id()); + } + + public function testFindClassMultiple() + { + $parent = new HtmlNode(new Tag('div')); + $child1 = new HtmlNode(new Tag('a')); + $child2 = new HtmlNode(new Tag('p')); + $child3 = new HtmlNode(new Tag('a')); + $child3->getTag()->setAttributes([ + 'class' => [ + 'value' => 'link outer', + 'doubleQuote' => false, + ], + ]); + $parent->addChild($child1); + $parent->addChild($child2); + $parent->addChild($child3); + + $selector = new Selector('.outer', new Parser()); + $this->assertEquals($child3->id(), $selector->find($parent)[0]->id()); + } + + public function testFindWild() + { + $root = new HtmlNode(new Tag('root')); + $parent = new HtmlNode(new Tag('div')); + $child1 = new HtmlNode(new Tag('a')); + $child2 = new HtmlNode(new Tag('p')); + $child3 = new HtmlNode(new Tag('a')); + $root->addChild($parent); + $parent->addChild($child1); + $parent->addChild($child2); + $child2->addChild($child3); + + $selector = new Selector('div * a', new Parser()); + $this->assertEquals($child3->id(), $selector->find($root)[0]->id()); + } + + public function testFindMultipleSelectors() + { + $root = new HtmlNode(new Tag('root')); + $parent = new HtmlNode(new Tag('div')); + $child1 = new HtmlNode(new Tag('a')); + $child2 = new HtmlNode(new Tag('p')); + $child3 = new HtmlNode(new Tag('a')); + $root->addChild($parent); + $parent->addChild($child1); + $parent->addChild($child2); + $child2->addChild($child3); + + $selector = new Selector('a, p', new Parser()); + $this->assertEquals(3, \count($selector->find($root))); + } + + public function testFindXpathKeySelector() + { + $parent = new HtmlNode(new Tag('div')); + $child1 = new HtmlNode(new Tag('a')); + $child2 = new HtmlNode(new Tag('p')); + $child3 = new HtmlNode(new Tag('a')); + $child3->getTag()->setAttributes([ + 'class' => [ + 'value' => 'link outer', + 'doubleQuote' => false, + ], + ]); + $parent->addChild($child1); + $parent->addChild($child2); + $parent->addChild($child3); + + $selector = new Selector('div[1]', new Parser()); + $this->assertEquals($parent->id(), $selector->find($parent)[0]->id()); + } + + public function testFindChildMultipleLevelsDeep() + { + $root = new HtmlNode(new Tag('root')); + $parent = new HtmlNode(new Tag('div')); + $child1 = new HtmlNode(new Tag('ul')); + $child2 = new HtmlNode(new Tag('li')); + $root->addChild($parent); + $parent->addChild($child1); + $child1->addChild($child2); + + $selector = new Selector('div li', new Parser()); + $this->assertEquals(1, \count($selector->find($root))); + } + + public function testFindAllChildren() + { + $root = new HtmlNode(new Tag('root')); + $parent = new HtmlNode(new Tag('div')); + $child1 = new HtmlNode(new Tag('ul')); + $child2 = new HtmlNode(new Tag('span')); + $child3 = new HtmlNode(new Tag('ul')); + $root->addChild($parent); + $parent->addChild($child1); + $child2->addChild($child3); + $parent->addChild($child2); + + $selector = new Selector('div ul', new Parser()); + $this->assertEquals(2, \count($selector->find($root))); + } + + public function testFindChildUsingChildSelector() + { + $root = new HtmlNode(new Tag('root')); + $parent = new HtmlNode(new Tag('div')); + $child1 = new HtmlNode(new Tag('ul')); + $child2 = new HtmlNode(new Tag('span')); + $child3 = new HtmlNode(new Tag('ul')); + $root->addChild($parent); + $parent->addChild($child1); + $child2->addChild($child3); + $parent->addChild($child2); + + $selector = new Selector('div > ul', new Parser()); + $this->assertEquals(1, \count($selector->find($root))); + } + + public function testFindNodeByAttributeOnly() + { + $root = new HtmlNode(new Tag('root')); + $child1 = new HtmlNode(new Tag('ul')); + $child1->setAttribute('custom-attr', null); + $root->addChild($child1); + + $selector = new Selector('[custom-attr]', new Parser()); + $this->assertEquals(1, \count($selector->find($root))); + } + + public function testFindMultipleClasses() + { + $root = new HtmlNode(new Tag('root')); + $child1 = new HtmlNode(new Tag('a')); + $child1->setAttribute('class', 'b'); + $child2 = new HtmlNode(new Tag('a')); + $child2->setAttribute('class', 'b c'); + $root->addChild($child1); + $root->addChild($child2); + + $selector = new Selector('a.b.c', new Parser()); + $this->assertEquals(1, \count($selector->find($root))); + } +} diff --git a/tests/SelectorTest.php b/tests/SelectorTest.php deleted file mode 100644 index fe06568a..00000000 --- a/tests/SelectorTest.php +++ /dev/null @@ -1,206 +0,0 @@ -getSelectors(); - $this->assertEquals('id', $selectors[0][0]['key']); - } - - public function testParseSelectorStringClass() - { - $selector = new Selector('div.post'); - $selectors = $selector->getSelectors(); - $this->assertEquals('class', $selectors[0][0]['key']); - } - - public function testParseSelectorStringAttribute() - { - $selector = new Selector('div[visible=yes]'); - $selectors = $selector->getSelectors(); - $this->assertEquals('yes', $selectors[0][0]['value']); - } - - public function testParseSelectorStringNoKey() - { - $selector = new Selector('div[!visible]'); - $selectors = $selector->getSelectors(); - $this->assertTrue($selectors[0][0]['noKey']); - } - - public function testFind() - { - $root = new HtmlNode('root'); - $parent = new HtmlNode('div'); - $child1 = new HtmlNode('a'); - $child2 = new HtmlNode('p'); - $parent->addChild($child1); - $parent->addChild($child2); - $root->addChild($parent); - - $selector = new Selector('div a'); - $this->assertEquals($child1->id(), $selector->find($root)[0]->id()); - } - - public function testFindId() - { - $parent = new HtmlNode(new Tag('div')); - $child1 = new HtmlNode(new Tag('a')); - $child2 = new HtmlNode(new Tag('p')); - $child2->getTag()->setAttributes([ - 'id' => [ - 'value' => 'content', - 'doubleQuote' => true, - ], - ]); - $parent->addChild($child1); - $parent->addChild($child2); - - $selector = new Selector('#content'); - $this->assertEquals($child2->id(), $selector->find($parent)[0]->id()); - } - - public function testFindClass() - { - $parent = new HtmlNode(new Tag('div')); - $child1 = new HtmlNode(new Tag('a')); - $child2 = new HtmlNode(new Tag('p')); - $child3 = new HtmlNode('a'); - $child3->getTag()->setAttributes([ - 'class' => [ - 'value' => 'link', - 'doubleQuote' => true, - ], - ]); - $parent->addChild($child1); - $parent->addChild($child2); - $parent->addChild($child3); - - $selector = new Selector('.link'); - $this->assertEquals($child3->id(), $selector->find($parent)[0]->id()); - } - - public function testFindClassMultiple() - { - $parent = new HtmlNode(new Tag('div')); - $child1 = new HtmlNode(new Tag('a')); - $child2 = new HtmlNode(new Tag('p')); - $child3 = new HtmlNode(new Tag('a')); - $child3->getTag()->setAttributes([ - 'class' => [ - 'value' => 'link outer', - 'doubleQuote' => false, - ], - ]); - $parent->addChild($child1); - $parent->addChild($child2); - $parent->addChild($child3); - - $selector = new Selector('.outer'); - $this->assertEquals($child3->id(), $selector->find($parent)[0]->id()); - } - - public function testFindWild() - { - $root = new HtmlNode(new Tag('root')); - $parent = new HtmlNode(new Tag('div')); - $child1 = new HtmlNode(new Tag('a')); - $child2 = new HtmlNode(new Tag('p')); - $child3 = new HtmlNode(new Tag('a')); - $root->addChild($parent); - $parent->addChild($child1); - $parent->addChild($child2); - $child2->addChild($child3); - - $selector = new Selector('div * a'); - $this->assertEquals($child3->id(), $selector->find($root)[0]->id()); - } - - public function testFindMultipleSelectors() - { - $root = new HtmlNode(new Tag('root')); - $parent = new HtmlNode(new Tag('div')); - $child1 = new HtmlNode(new Tag('a')); - $child2 = new HtmlNode(new Tag('p')); - $child3 = new HtmlNode(new Tag('a')); - $root->addChild($parent); - $parent->addChild($child1); - $parent->addChild($child2); - $child2->addChild($child3); - - $selector = new Selector('a, p'); - $this->assertEquals(3, count($selector->find($root))); - } - - public function testFindXpathKeySelector() - { - $parent = new HtmlNode(new Tag('div')); - $child1 = new HtmlNode(new Tag('a')); - $child2 = new HtmlNode(new Tag('p')); - $child3 = new HtmlNode(new Tag('a')); - $child3->getTag()->setAttributes([ - 'class' => [ - 'value' => 'link outer', - 'doubleQuote' => false, - ], - ]); - $parent->addChild($child1); - $parent->addChild($child2); - $parent->addChild($child3); - - $selector = new Selector('div[1]'); - $this->assertEquals($parent->id(), $selector->find($parent)[0]->id()); - } - - public function testFindChildMultipleLevelsDeep() - { - $root = new HtmlNode(new Tag('root')); - $parent = new HtmlNode(new Tag('div')); - $child1 = new HtmlNode(new Tag('ul')); - $child2 = new HtmlNode(new Tag('li')); - $root->addChild($parent); - $parent->addChild($child1); - $child1->addChild($child2); - - $selector = new Selector('div li'); - $this->assertEquals(1, count($selector->find($root))); - } - - public function testFindAllChildren() - { - $root = new HtmlNode(new Tag('root')); - $parent = new HtmlNode(new Tag('div')); - $child1 = new HtmlNode(new Tag('ul')); - $child2 = new HtmlNode(new Tag('span')); - $child3 = new HtmlNode(new Tag('ul')); - $root->addChild($parent); - $parent->addChild($child1); - $child2->addChild($child3); - $parent->addChild($child2); - - $selector = new Selector('div ul'); - $this->assertEquals(2, count($selector->find($root))); - } - - public function testFindChildUsingChildSelector() - { - $root = new HtmlNode(new Tag('root')); - $parent = new HtmlNode(new Tag('div')); - $child1 = new HtmlNode(new Tag('ul')); - $child2 = new HtmlNode(new Tag('span')); - $child3 = new HtmlNode(new Tag('ul')); - $root->addChild($parent); - $parent->addChild($child1); - $child2->addChild($child3); - $parent->addChild($child2); - - $selector = new Selector('div > ul'); - $this->assertEquals(1, count($selector->find($root))); - } -} diff --git a/tests/StaticDomTest.php b/tests/StaticDomTest.php old mode 100644 new mode 100755 index ac0160c8..73453633 --- a/tests/StaticDomTest.php +++ b/tests/StaticDomTest.php @@ -1,57 +1,79 @@ assertTrue($status); + } + + public function testloadStr() + { + $dom = Dom::loadStr('

Hey bro, click here
:)

'); + $div = $dom->find('div', 0); + $this->assertEquals('

Hey bro, click here
:)

', $div->outerHtml); + } + + public function testLoadWithFile() + { + $dom = Dom::loadFromFile('tests/data/files/small.html'); + $this->assertEquals('VonBurgermeister', $dom->find('.post-user font', 0)->text); + } + + public function testLoadFromFile() + { + $dom = Dom::loadFromFile('tests/data/files/small.html'); + $this->assertEquals('VonBurgermeister', $dom->find('.post-user font', 0)->text); + } + + /** + * @expectedException \PHPHtmlParser\Exceptions\NotLoadedException + */ + public function testFindNoloadStr() + { + Dom::find('.post-user font', 0); + } + + public function testFindI() + { + Dom::loadFromFile('tests/data/files/big.html'); + $this->assertEquals('В кустах блестит металл
И искрится ток
Человечеству конец', Dom::find('i')[1]->innerHtml); + } + + public function testLoadFromUrl() + { + $streamMock = Mockery::mock(\Psr\Http\Message\StreamInterface::class); + $streamMock->shouldReceive('getContents') + ->once() + ->andReturn(\file_get_contents('tests/data/files/small.html')); + $responseMock = Mockery::mock(\Psr\Http\Message\ResponseInterface::class); + $responseMock->shouldReceive('getBody') + ->once() + ->andReturn($streamMock); + $clientMock = Mockery::mock(\Psr\Http\Client\ClientInterface::class); + $clientMock->shouldReceive('sendRequest') + ->once() + ->andReturn($responseMock); -class StaticDomTest extends PHPUnit_Framework_TestCase { - - public function setUp() - { - StaticDom::mount(); - } - - public function tearDown() - { - StaticDom::unload(); - } - - public function testMountWithDom() - { - $dom = new PHPHtmlParser\Dom; - StaticDom::unload(); - $status = StaticDom::mount('newDom', $dom); - $this->assertTrue($status); - } - - public function testLoad() - { - $dom = Dom::load('

Hey bro, click here
:)

'); - $div = $dom->find('div', 0); - $this->assertEquals('

Hey bro, click here
:)

', $div->outerHtml); - } - - public function testLoadWithFile() - { - $dom = Dom::load('tests/files/small.html'); - $this->assertEquals('VonBurgermeister', $dom->find('.post-user font', 0)->text); - } - - public function testFind() - { - Dom::load('tests/files/horrible.html'); - $this->assertEquals('', Dom::find('table input', 1)->outerHtml); - } - - /** - * @expectedException PHPHtmlParser\Exceptions\NotLoadedException - */ - public function testFindNoLoad() - { - Dom::find('.post-user font', 0); - } - - public function testFindI() - { - Dom::load('tests/files/horrible.html'); - $this->assertEquals('[ Досие бр:12928 ]', Dom::find('i')[0]->innerHtml); - } + Dom::loadFromUrl('http://google.com', null, $clientMock); + $this->assertEquals('VonBurgermeister', Dom::find('.post-row div .post-user font', 0)->text); + } } diff --git a/tests/data/MockNode.php b/tests/data/MockNode.php new file mode 100755 index 00000000..15ca870b --- /dev/null +++ b/tests/data/MockNode.php @@ -0,0 +1,60 @@ +innerHtml = null; + $this->outerHtml = null; + $this->text = null; + if (\is_null($this->parent) === false) { + $this->parent->clear(); + } + } + + /** + * Returns all children of this html node. + */ + protected function getIteratorArray(): array + { + return $this->getChildren(); + } +} diff --git a/tests/data/files/51children.html b/tests/data/files/51children.html new file mode 100755 index 00000000..cc51f0d3 --- /dev/null +++ b/tests/data/files/51children.html @@ -0,0 +1,27 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/files/big.html b/tests/data/files/big.html old mode 100644 new mode 100755 similarity index 99% rename from tests/files/big.html rename to tests/data/files/big.html index 64f0f57b..a26f5093 --- a/tests/files/big.html +++ b/tests/data/files/big.html @@ -2,7 +2,7 @@ - + @@ -48,7 +48,9 @@ - + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/data/files/index.html b/tests/data/files/index.html new file mode 100644 index 00000000..2fa0b83a --- /dev/null +++ b/tests/data/files/index.html @@ -0,0 +1,1115 @@ + + + + + + + + + + + + + + + + + + + + An Introduction to Custom Fields – WordPress.tv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ + +

An Introduction to Custom Fields

+ +
+ +
+ +
+
+ +
+
+
+
+

+ 16 responses on “An Introduction to Custom Fields

+ +
    +
  1. + + Alex (Viper007Bond) + +
    + + + + + + +
    +
  2. + +
  3. + + Consciência Planetária + +
    + + + + + +
    + +

    Nice explanation!

    +

    I always wondered how these thumbnails are added to magazine layout themes and I had no idea! Indeed I was more interested on the PHP code of how making it work 😛

    +

    It seems that with custom fields we can do pratically any customization…
    +But I have 2 questions

    +

    Are custom fields limited to varchar values, or can we use any kind of data there? Does it have any size limit?

    +

    Ans what about these themes that have a stick post with a bigger image? Does it have 2 custom fields 1 for thumnail and 1 for stick image?

    +

    tnx again!

    +

    Like

    +
    +
    +
  4. + +
  5. + + Vid + +
    + + + + + +
    + +

    Hi, Scott

    +

    This is very helpful for those of us who aren’t programmers but want to maximize WordPress. Thanks for taking the time to submit this tip.

    +

    Like

    +
    +
    +
  6. + +
  7. + + Thomas Clausen + +
    + + + + + +
    + +

    Justin Tadlocks Get the Image plugin can also help you to solve this task (also without CSS stuff in the php file 😉 ).

    +

    Like

    +
    +
    +
  8. + +
  9. + + driz + +
    + + + + + +
    + +

    I would like to see a follow-up video explaining about that Custom Field Template that you told us to ignore. I know it’s created by coding in some WP hooks in functions.php as I have done it myself, but it would be cool to see your interpretation of doing this, might pick up some additional tips.

    +

    Like

    +
    +
    +
  10. + +
  11. + + Scott Ellis + +
    + + + + + +
    + +

    Thomas, that is a good plugin, Justin does great stuff but custom fileds can be used for a lot of things, images were just an easy example, hopefully viewers will find other creative uses. Not sure what you mean about all the css in the php?

    +

    Like

    +
    +
    +
  12. +
      +
    • + + Thomas Clausen + +
      + + + + + +
      + +

      I was just wondering about the styling, that you’ve got about 3 minutes into the video.

      +

      But I didn’t mean to take our eyes of off things. You’re absolutely right the custom fields can be a powerful tool. And I hadn’t really grasped how easy it is, before I saw your video. Good job.

      +

      Like

      +
      +
      +
    • + +
    + +
  13. + + Karen + +
    + + + + + +
    + +

    This is very hard to follow. I admit, I’m not a newbie, but I am not terribly advanced, so I’m not sure where the problem is. One, I think you’re moving too fast. Two, when exactly should you first see the picture appear in the custom field? After you copy the url to the value field and update? or after you go into the home.php and change the code? That is completely unclear. This is such a great topic, so I hope if I post a few questions, it will become clear how to do this. I really do appreciate that you’ve posted this. Thanks!

    +

    Like

    +
    +
    +
  14. + +
  15. + + Karen + +
    + + + + + +
    + +

    also, my home.php doesn’t have the code you show. I am working in the theme Constructor. below is all the text in the home.php file:

    +

    Like

    +
    +
    +
  16. + +
  17. + + Karen + +
    + + + + + +
    + +

    Woopsie & sorry. below is the code in my file:

    +

    Like

    +
    +
    +
  18. + +
  19. + + Scott Ellis + +
    + + + + + +
    + +

    Consciência, you could use two custom fields for that if you wanted to. It would work just fine. I’ve read about some potential performance issues if you use a lot of custom fields but haven’t experienced it myself. We use several on citycrush.com including for the post thumbnail and the image in the post after you click through. The custom field type in the DB is “longtext” maximum size 4GB.

    +

    Driz – We used a plugin for that previously but moved to adding it to the functions.php and using wp hooks so it sounds like you are doing exactly what we would.

    +

    Thomas, most of what was in there was actual logic or just spitting out the html, not css styling, we keep all of that in the stylesheet. Glad you enjoyed the video.

    +

    Karen, sorry it felt fast, if you look at an example and watch the video I’m sure you’ll pick it up quickly. It took me a couple of rounds the first time I started playing with custom fields. Justin Tadlock has a good explanation here as well: http://justintadlock.com/archives/2007/10/24/using-wordpress-custom-fields-introduction.
    +The pictures will appear on the page where your custom field spits them out once you save the image url in the appropriate custom field. FYI, you code didn’t show up so visit http://www.vsellis.com/wordpress-how-to/using-custom-fields-in-wordpress/ and leave a comment and I’ll take a closer look.

    +

    Like

    +
    +
    +
  20. + +
  21. + + Consciência Planetária + +
    + + + + + +
    + +

    Thanks for the reply!

    +

    I’d like to suggest a subject for a future tutorial.

    +

    I love Drupal’s ability to use blocks above and below main content area. It is much easier to implement than WordPress widgets.

    +

    But I know it can be done in WordPress too. I’ve seen some magazine themes that have a “horizontal sidebar” on the botton of the page, and recently I’ve also seen a premium theme that has a “top horizontal sidebar” and a “bottom horizontal sidebar”, together with standard right and left ones.

    +

    It would be great if we had a tutorial teaching how to do it!

    +

    Like

    +
    +
    +
  22. + +
  23. + + PNaw10 + +
    + + + + + +
    + +

    Hello all, just wanted to add one extra tidbit of info.

    +

    The first time you use custom fields, the “name” field is blank, so yes, you would be typing in “thumbnail” as seen in the video. But after your very first use, the “name” field will appear as a pulldown menu which displays ALL previously-used names. So you really don’t have to worry about typing it the right way every single time — just as long as you get it right the first time, you can just select it from the menu. Much faster, and it ensures you’re spelling it the same way every time.

    +

    I realize everyone will discover this on their own as they try it, but thought I’d mention it in case anyone was daunted by the prospect of having to be extra-careful about typing out those case-sensitive field names every time.

    +

    Case-sensitive is definitely important though… for one website I run, cnyradio.com , I originally used Tadlock’s “Newspaperize” theme, which used the custom keywords “thumbnail” and “image.” Later, I upgraded to a newer theme of his, but the theme was designed to seek out “Thumbnail” and “Image” with capital letters at the beginning. Rather than go through all my old posts to change the custom keywords (would have taken forever) I just changed the uppercase letters to lowercase in the theme templates.

    +

    If you want a good example of how different custom fields can help with your site design, check out cnyradio.com. It’s not as complex as the site shown in the video, but it’s (hopefully) still simple enough for newbies (like I was just 2 years ago) to understand.

    +

    My “loop” pages (home page, category pages, etc.) show 128×96 images invoked by the “thumbnail” custom field. When you click to read the full text of any post, a larger 200×150 image appears, invoked by the “image” custom field. If either field is blank or missing, then the site simply doesn’t display an image — the text takes up the entire width of the space.

    +

    Yes, it’s more work because I have to create 2 custom fields for each post, and I create 2 separate images. I do the latter for two main reasons. One, I don’t like relying on web browsers to resize images on-the-fly. Even if it looks OK on my computer, it may appear choppy on someone else’s.

    +

    Two, and more importantly, an image at 200×150 doesn’t always look so good when you simply resize it to 128×96. For example, the “fullsize” version of any mugshots I use will often include the subject’s name and a “courtesy line” to credit the photo source. But that text would be cluttered and tiny when the size is reduced, so when I make the thumbnail, I usually delete the “courtesy” line and bump up the text size of the person’s last name so it’s less cluttered and easier to read.

    +

    If anyone reading this does look at my site to see what I’m talking about, just a note that any “Picture of the Week” posts are done entirely differently. I won’t get into details, just wanted to avoid any confusion.

    +

    Like

    +
    +
    +
  24. + +
  25. + + Sarfraz Ahmed + +
    + + + + + +
    + +

    can we add custom fields to wordpress.com blogs?

    +

    Like

    +
    +
    +
  26. + + +
  27. + + votar fotos + +
    + + + + + +
    + +

    I guess never say never, huh?

    +

    Like

    +
    +
    +
  28. + +
+
+

Continue the discussion

+ + +
+ +
+
+ +
+
+

Fill in your details below or click an icon to log in:

+ +
+ +
+
+
+ Gravatar +
+ +
+ +
+ +
+
+
+ +
+
+
+ +
+
+ +
+
+
+ WordPress.com Logo +
+ +
+ + + +

+ + You are commenting using your WordPress.com account. + ( Log Out /  + Change ) + + +

+
+ +
+
+ +
+
+
+ Google photo +
+ +
+ + + +

+ + You are commenting using your Google account. + ( Log Out /  + Change ) + + +

+
+ +
+
+ +
+
+
+ Twitter picture +
+ +
+ + + +

+ + You are commenting using your Twitter account. + ( Log Out /  + Change ) + + +

+
+ +
+
+ +
+
+
+ Facebook photo +
+ +
+ + + +

+ + You are commenting using your Facebook account. + ( Log Out /  + Change ) + + +

+
+ +
+
+ + +
+ +

Connecting to %s

+
+ +
+ + + +
+

+ + + + +

+ +

+ +

+
+
+
+ +
+
Published
+

August 29, 2009

+ +

Using custom fields can be confusing to new WordPress users. Scott Ellis provides an introductory explanation of how to use custom fields for image placement and the components that go into making custom fields work from front end placement to back end utilization and code.

+

Rate this:

+
Speakers

Scott Ellis 3

Tags

Custom Fields 23

Language

English 8849

Download
+
+MP4: Low, Med
OGG: Low
+
Subtitles
Subtitle this video → +
Producer
+ + + +
+
+ + +
+ + + + + + + + +
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + + + + + + + + + + + + + +
    + + + + + + diff --git a/tests/data/files/mvEregiReplaceFailure.html b/tests/data/files/mvEregiReplaceFailure.html new file mode 100644 index 00000000..d9a559d4 --- /dev/null +++ b/tests/data/files/mvEregiReplaceFailure.html @@ -0,0 +1,1117 @@ + + + + + + + + + + + + + + + + + + + + An Introduction to Custom Fields – WordPress.tv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    + +
    +
    + + +

    An Introduction to Custom Fields

    + +
    + +
    + +
    +
    + +
    +
    +
    +
    +

    + 16 responses on “An Introduction to Custom Fields

    + +
      +
    1. + + Alex (Viper007Bond) + +
      + + + + + + +
      +
    2. + +
    3. + + Consciência Planetária + +
      + + + + + +
      + +

      Nice explanation!

      +

      I always wondered how these thumbnails are added to magazine layout themes and I had no idea! Indeed I was more interested on the PHP code of how making it work 😛

      +

      It seems that with custom fields we can do pratically any customization…
      +But I have 2 questions

      +

      Are custom fields limited to varchar values, or can we use any kind of data there? Does it have any size limit?

      +

      Ans what about these themes that have a stick post with a bigger image? Does it have 2 custom fields 1 for thumnail and 1 for stick image?

      +

      tnx again!

      +

      Like

      +
      +
      +
    4. + +
    5. + + Vid + +
      + + + + + +
      + +

      Hi, Scott

      +

      This is very helpful for those of us who aren’t programmers but want to maximize WordPress. Thanks for taking the time to submit this tip.

      +

      Like

      +
      +
      +
    6. + +
    7. + + Thomas Clausen + +
      + + + + + +
      + +

      Justin Tadlocks Get the Image plugin can also help you to solve this task (also without CSS stuff in the php file 😉 ).

      +

      Like

      +
      +
      +
    8. + +
    9. + + driz + +
      + + + + + +
      + +

      I would like to see a follow-up video explaining about that Custom Field Template that you told us to ignore. I know it’s created by coding in some WP hooks in functions.php as I have done it myself, but it would be cool to see your interpretation of doing this, might pick up some additional tips.

      +

      Like

      +
      +
      +
    10. + +
    11. + + Scott Ellis + +
      + + + + + +
      + +

      Thomas, that is a good plugin, Justin does great stuff but custom fileds can be used for a lot of things, images were just an easy example, hopefully viewers will find other creative uses. Not sure what you mean about all the css in the php?

      +

      Like

      +
      +
      +
    12. +
        +
      • + + Thomas Clausen + +
        + + + + + +
        + +

        I was just wondering about the styling, that you’ve got about 3 minutes into the video.

        +

        But I didn’t mean to take our eyes of off things. You’re absolutely right the custom fields can be a powerful tool. And I hadn’t really grasped how easy it is, before I saw your video. Good job.

        +

        Like

        +
        +
        +
      • + +
      + +
    13. + + Karen + +
      + + + + + +
      + +

      This is very hard to follow. I admit, I’m not a newbie, but I am not terribly advanced, so I’m not sure where the problem is. One, I think you’re moving too fast. Two, when exactly should you first see the picture appear in the custom field? After you copy the url to the value field and update? or after you go into the home.php and change the code? That is completely unclear. This is such a great topic, so I hope if I post a few questions, it will become clear how to do this. I really do appreciate that you’ve posted this. Thanks!

      +

      Like

      +
      +
      +
    14. + +
    15. + + Karen + +
      + + + + + +
      + +

      also, my home.php doesn’t have the code you show. I am working in the theme Constructor. below is all the text in the home.php file:

      +

      Like

      +
      +
      +
    16. + +
    17. + + Karen + +
      + + + + + +
      + +

      Woopsie & sorry. below is the code in my file:

      +

      Like

      +
      +
      +
    18. + +
    19. + + Scott Ellis + +
      + + + + + +
      + +

      Consciência, you could use two custom fields for that if you wanted to. It would work just fine. I’ve read about some potential performance issues if you use a lot of custom fields but haven’t experienced it myself. We use several on citycrush.com including for the post thumbnail and the image in the post after you click through. The custom field type in the DB is “longtext” maximum size 4GB.

      +

      Driz – We used a plugin for that previously but moved to adding it to the functions.php and using wp hooks so it sounds like you are doing exactly what we would.

      +

      Thomas, most of what was in there was actual logic or just spitting out the html, not css styling, we keep all of that in the stylesheet. Glad you enjoyed the video.

      +

      Karen, sorry it felt fast, if you look at an example and watch the video I’m sure you’ll pick it up quickly. It took me a couple of rounds the first time I started playing with custom fields. Justin Tadlock has a good explanation here as well: http://justintadlock.com/archives/2007/10/24/using-wordpress-custom-fields-introduction.
      +The pictures will appear on the page where your custom field spits them out once you save the image url in the appropriate custom field. FYI, you code didn’t show up so visit http://www.vsellis.com/wordpress-how-to/using-custom-fields-in-wordpress/ and leave a comment and I’ll take a closer look.

      +

      Like

      +
      +
      +
    20. + +
    21. + + Consciência Planetária + +
      + + + + + +
      + +

      Thanks for the reply!

      +

      I’d like to suggest a subject for a future tutorial.

      +

      I love Drupal’s ability to use blocks above and below main content area. It is much easier to implement than WordPress widgets.

      +

      But I know it can be done in WordPress too. I’ve seen some magazine themes that have a “horizontal sidebar” on the botton of the page, and recently I’ve also seen a premium theme that has a “top horizontal sidebar” and a “bottom horizontal sidebar”, together with standard right and left ones.

      +

      It would be great if we had a tutorial teaching how to do it!

      +

      Like

      +
      +
      +
    22. + +
    23. + + PNaw10 + +
      + + + + + +
      + +

      Hello all, just wanted to add one extra tidbit of info.

      +

      The first time you use custom fields, the “name” field is blank, so yes, you would be typing in “thumbnail” as seen in the video. But after your very first use, the “name” field will appear as a pulldown menu which displays ALL previously-used names. So you really don’t have to worry about typing it the right way every single time — just as long as you get it right the first time, you can just select it from the menu. Much faster, and it ensures you’re spelling it the same way every time.

      +

      I realize everyone will discover this on their own as they try it, but thought I’d mention it in case anyone was daunted by the prospect of having to be extra-careful about typing out those case-sensitive field names every time.

      +

      Case-sensitive is definitely important though… for one website I run, cnyradio.com , I originally used Tadlock’s “Newspaperize” theme, which used the custom keywords “thumbnail” and “image.” Later, I upgraded to a newer theme of his, but the theme was designed to seek out “Thumbnail” and “Image” with capital letters at the beginning. Rather than go through all my old posts to change the custom keywords (would have taken forever) I just changed the uppercase letters to lowercase in the theme templates.

      +

      If you want a good example of how different custom fields can help with your site design, check out cnyradio.com. It’s not as complex as the site shown in the video, but it’s (hopefully) still simple enough for newbies (like I was just 2 years ago) to understand.

      +

      My “loop” pages (home page, category pages, etc.) show 128×96 images invoked by the “thumbnail” custom field. When you click to read the full text of any post, a larger 200×150 image appears, invoked by the “image” custom field. If either field is blank or missing, then the site simply doesn’t display an image — the text takes up the entire width of the space.

      +

      Yes, it’s more work because I have to create 2 custom fields for each post, and I create 2 separate images. I do the latter for two main reasons. One, I don’t like relying on web browsers to resize images on-the-fly. Even if it looks OK on my computer, it may appear choppy on someone else’s.

      +

      Two, and more importantly, an image at 200×150 doesn’t always look so good when you simply resize it to 128×96. For example, the “fullsize” version of any mugshots I use will often include the subject’s name and a “courtesy line” to credit the photo source. But that text would be cluttered and tiny when the size is reduced, so when I make the thumbnail, I usually delete the “courtesy” line and bump up the text size of the person’s last name so it’s less cluttered and easier to read.

      +

      If anyone reading this does look at my site to see what I’m talking about, just a note that any “Picture of the Week” posts are done entirely differently. I won’t get into details, just wanted to avoid any confusion.

      +

      Like

      +
      +
      +
    24. + +
    25. + + Sarfraz Ahmed + +
      + + + + + +
      + +

      can we add custom fields to wordpress.com blogs?

      +

      Like

      +
      +
      +
    26. + + +
    27. + + votar fotos + +
      + + + + + +
      + +

      I guess never say never, huh?

      +

      Like

      +
      +
      +
    28. + +
    +
    +

    Continue the discussion

    + + +
    + +
    +
    + +
    +
    +

    Fill in your details below or click an icon to log in:

    + +
    + +
    +
    +
    + Gravatar +
    + +
    + +
    + +
    +
    +
    + +
    +
    +
    + +
    +
    + +
    +
    +
    + WordPress.com Logo +
    + +
    + + + +

    + + You are commenting using your WordPress.com account. + ( Log Out /  + Change ) + + +

    +
    + +
    +
    + +
    +
    +
    + Google photo +
    + +
    + + + +

    + + You are commenting using your Google account. + ( Log Out /  + Change ) + + +

    +
    + +
    +
    + +
    +
    +
    + Twitter picture +
    + +
    + + + +

    + + You are commenting using your Twitter account. + ( Log Out /  + Change ) + + +

    +
    + +
    +
    + +
    +
    +
    + Facebook photo +
    + +
    + + + +

    + + You are commenting using your Facebook account. + ( Log Out /  + Change ) + + +

    +
    + +
    +
    + + +
    + +

    Connecting to %s

    +
    + +
    + + + +
    +

    + + + + +

    + +

    + +

    +
    +
    +
    + +
    +
    Published
    +

    August 29, 2009

    + +

    Using custom fields can be confusing to new WordPress users. Scott Ellis provides an introductory explanation of how to use custom fields for image placement and the components that go into making custom fields work from front end placement to back end utilization and code.

    +

    Rate this:

    +
    Speakers

    Scott Ellis 3

    Tags

    Custom Fields 23

    Language

    English 8849

    Download
    +
    +MP4: Low, Med
    OGG: Low
    +
    Subtitles
    Subtitle this video → +
    Producer
    + + + +
    +
    + + +
    + + + + + + + + +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + + + + + + + + + + + + + + +
      + + + + + + diff --git a/tests/files/small.html b/tests/data/files/small.html old mode 100644 new mode 100755 similarity index 100% rename from tests/files/small.html rename to tests/data/files/small.html diff --git a/tests/data/files/whitespace.html b/tests/data/files/whitespace.html new file mode 100755 index 00000000..b2603edc --- /dev/null +++ b/tests/data/files/whitespace.html @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/tests/files/horrible.html b/tests/files/horrible.html deleted file mode 100644 index c5430be1..00000000 --- a/tests/files/horrible.html +++ /dev/null @@ -1,301 +0,0 @@ - - -МАРнет - - - - - - - - - - -

      - - 0-9 A - B C - D E - F G - H I - J K - L M - N O - P Q - R S - T U - V X Y - W Z - -

      - - - - - - - - -
      - - -
      - -
      - ���������� �� �������: - - -
      -
      -
      - - - - -
      ������ ������������ ������:
      marnet.org.mk
      marnet.edu.mk
      marnet.com.mk
      marnet.mk
      -
      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
      -
      �������� �� �������: marnet.mk [ ����� ��:12928 ]
      -
      -
      ���� �� ��� � ������� �������������: 22-05-2014
      -
      -
      -

      ���� �� ����������� �� �������:

      -
      -
      22-05-2008
      -
      -

      ����� ��� �� ������������:

      -
      -
      ���������� ��������� ������������ ����� �����
      -
      -

      ������ �� ������������:

      -
      -
      ��. ������� ����������� ������ ��.17 ���ϣ�
      -
      -

      ��� �� ������������:

      -
      -
      4080011519278
      -
      -

      �������� �� ������������:

      -
      -
      02/3256-561
        
      ��������������� �������
      -
      -

      ���:

      -
      -
      ���� �����������
      -
      -

      e-mail:

      -
      -
      domains@marnet.net.mk
      -
      -

      �������:

      -
      -
      //
      �������� �������
      -
      -

      ���:

      -
      -
      ���� �����������
      -
      -

      e-mail:

      -
      -
      domains@marnet.net.mk
      -
      -

      �������:

      -
      -
      //
      ������� �����������
      ���
      IP
      nsg.mio.gov.mk80.77.151.251
      kitka.marnet.net.mk194.149.131.2
      - - - -
      - - - -