Skip to content
This repository was archived by the owner on Jan 10, 2023. It is now read-only.
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Decoupled tokenizer from detection strategy
This opens up possibilities to swap the default token_get_all()-
based tokenizer for another one, or to add tokenizers for other
programming languages.
  • Loading branch information
aboks committed Oct 27, 2013
commit 39f7058e5a67d01b2fe8e36445bccef52328a3ab
9 changes: 7 additions & 2 deletions build/package.xml
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,15 @@
</dir>
<dir name="Detector">
<dir name="Strategy">
<file baseinstalldir="/" name="Abstract.php" role="php" />
<file baseinstalldir="/" name="Default.php" role="php" />
<file baseinstalldir="/" name="Abstract.php" role="php" />
<file baseinstalldir="/" name="Default.php" role="php" />
</dir>
<dir name="Tokenizer">
<file baseinstalldir="/" name="PHP.php" role="php" />
<file baseinstalldir="/" name="Result.php" role="php" />
</dir>
<file baseinstalldir="/" name="Detector.php" role="php" />
<file baseinstalldir="/" name="Tokenizer.php" role="php" />
</dir>
<dir name="Log">
<file baseinstalldir="/" name="AbstractXmlLogger.php" role="php" />
Expand Down
8 changes: 5 additions & 3 deletions src/CLI/Command.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@

use SebastianBergmann\PHPCPD\Detector\Detector;
use SebastianBergmann\PHPCPD\Detector\Strategy\DefaultStrategy;
use SebastianBergmann\PHPCPD\Detector\Tokenizer\PHP;
use SebastianBergmann\PHPCPD\Log\PMD;
use SebastianBergmann\PHPCPD\Log\Text;
use SebastianBergmann\FinderFacade\FinderFacade;
Expand Down Expand Up @@ -162,9 +163,10 @@ protected function execute(InputInterface $input, OutputInterface $output)
$progressHelper->start($output, count($files));
}

$strategy = new DefaultStrategy;
$detector = new Detector($strategy, $progressHelper);
$quiet = $output->getVerbosity() == OutputInterface::VERBOSITY_QUIET;
$tokenizer = new PHP;
$strategy = new DefaultStrategy($tokenizer);
$detector = new Detector($strategy, $progressHelper);
$quiet = $output->getVerbosity() == OutputInterface::VERBOSITY_QUIET;

$clones = $detector->copyPasteDetection(
$files,
Expand Down
26 changes: 22 additions & 4 deletions src/Detector/Strategy/Default.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
use SebastianBergmann\PHPCPD\CodeClone;
use SebastianBergmann\PHPCPD\CodeCloneFile;
use SebastianBergmann\PHPCPD\CodeCloneMap;
use SebastianBergmann\PHPCPD\Detector\Tokenizer;

/**
* Default strategy for detecting code clones.
Expand All @@ -59,6 +60,22 @@
*/
class DefaultStrategy extends AbstractStrategy
{
/**
* @var Tokenizer
*/
private $tokenizer;

/**
* Constructor.
*
* @param Tokenizer $tokenizer
* @since Method available since Release 2.0.0
*/
public function __construct(Tokenizer $tokenizer)
{
$this->tokenizer = $tokenizer;
}

/**
* Copy & Paste Detection (CPD).
*
Expand All @@ -71,19 +88,20 @@ class DefaultStrategy extends AbstractStrategy
*/
public function processFile($file, $minLines, $minTokens, CodeCloneMap $result, $fuzzy = false)
{
$buffer = file_get_contents($file);
$tokenizerResult = $this->tokenizer->tokenizeFile($file);

$currentTokenPositions = array();
$currentTokenRealPositions = array();
$currentSignature = '';
$tokens = token_get_all($buffer);
$tokens = $tokenizerResult->getTokens();
$tokenNr = 0;
$lastTokenLine = 0;

$result->setNumLines(
$result->getNumLines() + substr_count($buffer, "\n")
$result->getNumLines() + $tokenizerResult->getNumberOfLines()
);

unset($buffer);
unset($tokenizerResult);

foreach (array_keys($tokens) as $key) {
$token = $tokens[$key];
Expand Down
68 changes: 68 additions & 0 deletions src/Detector/Tokenizer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
<?php
/**
* phpcpd
*
* Copyright (c) 2009-2013, Sebastian Bergmann <[email protected]>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* * Neither the name of Sebastian Bergmann nor the names of his
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package phpcpd
* @author Sebastian Bergmann <[email protected]>
* @copyright 2009-2013 Sebastian Bergmann <[email protected]>
* @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License
* @since File available since Release 2.0.0
*/

namespace SebastianBergmann\PHPCPD\Detector;

use SebastianBergmann\PHPCPD\Detector\Tokenizer\Result;

/**
* Interface for a tokenizer, which reads a file and splits it
* into lexical tokens.
*
* @author Johann-Peter Hartmann <[email protected]>
* @author Sebastian Bergmann <[email protected]>
* @copyright 2009-2013 Sebastian Bergmann <[email protected]>
* @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License
* @link http://github.com/sebastianbergmann/phpcpd/tree
* @since Class available since Release 2.0.0
*/
interface Tokenizer
{
/**
* Tokenizes a file.
*
* @param string $file
* @return Result
*/
public function tokenizeFile($file);
}
74 changes: 74 additions & 0 deletions src/Detector/Tokenizer/PHP.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
<?php
/**
* phpcpd
*
* Copyright (c) 2009-2013, Sebastian Bergmann <[email protected]>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* * Neither the name of Sebastian Bergmann nor the names of his
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package phpcpd
* @author Sebastian Bergmann <[email protected]>
* @copyright 2009-2013 Sebastian Bergmann <[email protected]>
* @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License
* @since File available since Release 2.0.0
*/

namespace SebastianBergmann\PHPCPD\Detector\Tokenizer;

use SebastianBergmann\PHPCPD\Detector\Tokenizer;

/**
* A tokenizer for PHP source files.
*
* @author Johann-Peter Hartmann <[email protected]>
* @author Sebastian Bergmann <[email protected]>
* @copyright 2009-2013 Sebastian Bergmann <[email protected]>
* @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License
* @link http://github.com/sebastianbergmann/phpcpd/tree
* @since Class available since Release 2.0.0
*/
class PHP implements Tokenizer
{
/**
* Tokenizes a PHP file.
*
* @param string $file
* @return Result
*/
public function tokenizeFile($file)
{
$buffer = file_get_contents($file);
$tokens = token_get_all($buffer);
$numberOfLines = substr_count($buffer, "\n");

return new Result($tokens, $numberOfLines);
}
}
101 changes: 101 additions & 0 deletions src/Detector/Tokenizer/Result.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
<?php
/**
* phpcpd
*
* Copyright (c) 2009-2013, Sebastian Bergmann <[email protected]>.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* * Neither the name of Sebastian Bergmann nor the names of his
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package phpcpd
* @author Sebastian Bergmann <[email protected]>
* @copyright 2009-2013 Sebastian Bergmann <[email protected]>
* @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License
* @since File available since Release 2.0.0
*/

namespace SebastianBergmann\PHPCPD\Detector\Tokenizer;

/**
* Value object that represents the output of tokenizing a file.
*
* @author Johann-Peter Hartmann <[email protected]>
* @author Sebastian Bergmann <[email protected]>
* @copyright 2009-2013 Sebastian Bergmann <[email protected]>
* @license http://www.opensource.org/licenses/BSD-3-Clause The BSD 3-Clause License
* @link http://github.com/sebastianbergmann/phpcpd/tree
* @since Class available since Release 2.0.0
*/
class Result
{
/**
* @var array
*/
private $tokens;

/**
* @var integer
*/
private $numberOfLines;

/**
* Constructor.
*
* @param array $tokens Array of tokens, in the same format
* as token_get_all() returns them.
* @param integer $numberOfLines The number of lines in the read file.
*/
public function __construct(array $tokens, $numberOfLines)
{
$this->tokens = $tokens;
$this->numberOfLines = $numberOfLines;
}

/**
* Returns the tokens that were read from the file.
*
* @return array Array of tokens, in the same format
* as token_get_all() returns them.
*/
public function getTokens()
{
return $this->tokens;
}

/**
* Returns the number of lines in the file.
*
* @return integer
*/
public function getNumberOfLines()
{
return $this->numberOfLines;
}
}
3 changes: 3 additions & 0 deletions src/autoload.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,9 @@ function ($class) {
'sebastianbergmann\\phpcpd\\detector\\detector' => '/Detector/Detector.php',
'sebastianbergmann\\phpcpd\\detector\\strategy\\abstractstrategy' => '/Detector/Strategy/Abstract.php',
'sebastianbergmann\\phpcpd\\detector\\strategy\\defaultstrategy' => '/Detector/Strategy/Default.php',
'sebastianbergmann\\phpcpd\\detector\\tokenizer' => '/Detector/Tokenizer.php',
'sebastianbergmann\\phpcpd\\detector\\tokenizer\\php' => '/Detector/Tokenizer/PHP.php',
'sebastianbergmann\\phpcpd\\detector\\tokenizer\\result' => '/Detector/Tokenizer/Result.php',
'sebastianbergmann\\phpcpd\\log\\abstractxmllogger' => '/Log/AbstractXmlLogger.php',
'sebastianbergmann\\phpcpd\\log\\pmd' => '/Log/PMD.php',
'sebastianbergmann\\phpcpd\\log\\text' => '/Log/Text.php'
Expand Down
Loading