diff --git a/ModelCrawler/pom.xml b/ModelCrawler/pom.xml index ea81e89..f56be4f 100644 --- a/ModelCrawler/pom.xml +++ b/ModelCrawler/pom.xml @@ -4,7 +4,7 @@ de.unirostock.sems ModelCrawler - 0.0.3-SNAPSHOT + 0.0.4 jar ModelCrawler @@ -25,6 +25,18 @@ + + + de.binfalse + BFUtils + 0.4 + + + + org.eclipse.jgit + org.eclipse.jgit + 3.7.0.201502260915-r + junit @@ -83,16 +95,11 @@ commons-io 2.4 - - com.aragost.javahg - javahg - 0.6 - de.unirostock.sems BiVeS - 1.2.5 + 1.3.11 de.unirostock.sems diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/App.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/App.java index 12ff6cf..3b751c3 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/App.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/App.java @@ -28,20 +28,33 @@ import de.unirostock.sems.morre.client.exception.MorreException; import de.unirostock.sems.morre.client.impl.HttpMorreClient; +// TODO: Auto-generated Javadoc /** - * Hello world! - * + * Hello world!. */ public class App { + + /** The Constant log. */ private static final Log log = LogFactory.getLog( App.class ); + /** The morre client. */ private static MorreCrawlerInterface morreClient; + + /** The bio models db. */ private static ModelDatabase bioModelsDb; + + /** The pmr2 db. */ private static ModelDatabase pmr2Db; + /** The xml file server. */ private static XmlFileServer xmlFileServer = null; + /** + * The main method. + * + * @param args the arguments + */ public static void main( String[] args ) { log.info("ModelCrawler startet"); @@ -77,7 +90,7 @@ public static void main( String[] args ) { // XXX Limiter! // int n = 1; // limiter - // going throw all changeSets ... + // going through all changeSets ... Iterator changesSetIterator = changes.values().iterator(); while( changesSetIterator.hasNext() ) { // ... and process them @@ -94,6 +107,9 @@ public static void main( String[] args ) { log.info("finished crawling"); } + /** + * Prepare. + */ private static void prepare() { if( log.isInfoEnabled() ) @@ -109,6 +125,9 @@ private static void prepare() { Properties.checkAndInitWorkingDir(); } + /** + * Inits the connectors. + */ private static void initConnectors() { if( log.isInfoEnabled() ) @@ -152,6 +171,9 @@ private static void initConnectors() { } + /** + * Clean up. + */ private static void cleanUp() { log.info("Cleans everything up!"); @@ -162,6 +184,11 @@ private static void cleanUp() { pmr2Db.cleanUp(); } + /** + * Process change set. + * + * @param changeSet the change set + */ private static void processChangeSet( ChangeSet changeSet ) { //XXX some sort of filter diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/Properties.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/Properties.java index e333ac7..f1081e4 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/Properties.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/Properties.java @@ -6,19 +6,26 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import de.unirostock.sems.XmlFileServerClient.XmlFileServer; - +// TODO: Auto-generated Javadoc +/** + * The Class Properties taking care of settings for the model crawler. + */ public class Properties { + /** The prop. */ private static java.util.Properties prop = null; + + /** The working dir. */ private static File workingDir = null; + /** The Constant log. */ private static final Log log = LogFactory.getLog( Properties.class ); + /** The split char to refer to other properties. */ public static final String ELEMENT_SPLITTER = ";"; /** - * Init the Properties System + * Init the Properties System. */ public static void init() { prop = new java.util.Properties(); @@ -39,6 +46,13 @@ public static void init() { } + /** + * Gets a property. + * + * @param key the key + * @param defaultValue the default value + * @return the property + */ public static String getProperty( String key, String defaultValue ) { if( prop != null ) return prop.getProperty(key, defaultValue); @@ -46,6 +60,12 @@ public static String getProperty( String key, String defaultValue ) { return null; } + /** + * Gets a property. + * + * @param key the key + * @return the property + */ public static String getProperty( String key ) { if( prop != null ) return prop.getProperty(key); @@ -53,6 +73,9 @@ public static String getProperty( String key ) { return null; } + /** + * Initialises working dir if not yet done. + */ public static void checkAndInitWorkingDir() { // only once needed... @@ -68,6 +91,11 @@ public static void checkAndInitWorkingDir() { } + /** + * Gets the working dir. + * + * @return the working dir + */ public static File getWorkingDir() { return workingDir; } diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/XmlFileRepository/XmlFileRepository.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/XmlFileRepository/XmlFileRepository.java index 28017f2..dfb9e09 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/XmlFileRepository/XmlFileRepository.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/XmlFileRepository/XmlFileRepository.java @@ -5,15 +5,30 @@ import de.unirostock.sems.ModelCrawler.Properties; +// TODO: Auto-generated Javadoc +/** + * The Class XmlFileRepository. + */ public class XmlFileRepository { + /** The Constant URL_ENCODING. */ public static final String URL_ENCODING = Properties.getProperty("de.unirostock.sems.ModelCrawler.encoding", "UTF-8"); + + /** The Constant URL_PATH_SEPARATOR. */ public static final String URL_PATH_SEPARATOR = Properties.getProperty("de.unirostock.sems.ModelCrawler.pathSeparator", "/"); // private final Log log = LogFactory.getLog( XmlFileRepository.class ); - public static String generateFileId( String repositoryUrl, String fileName ) throws UnsupportedEncodingException { + /** + * Generate file id. + * + * @param repositoryUrl the repository url + * @param fileName the file name + * @return the string + * @throws UnsupportedEncodingException the unsupported encoding exception + */ +public static String generateFileId( String repositoryUrl, String fileName ) throws UnsupportedEncodingException { StringBuilder result = new StringBuilder(repositoryUrl); // if repo Url does not end and the file name does not starts with a slash / diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelRelease.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelRelease.java index ec6ee29..bb993a2 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelRelease.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelRelease.java @@ -5,17 +5,38 @@ import java.util.Map; import java.util.Set; +// TODO: Auto-generated Javadoc +/** + * The Class BioModelRelease. + */ public class BioModelRelease implements Comparable { + /** The release name. */ private String releaseName; + + /** The ftp directory. */ private String ftpDirectory; + + /** The release date. */ private Date releaseDate; + /** The archiv file. */ private File archivFile = null; + + /** The content dir. */ private File contentDir = null; + /** The model map. */ private Map modelMap; + /** + * The Constructor. + * + * @param releaseName the release name + * @param ftpDirectory the ftp directory + * @param releaseDate the release date + * @param archivFile the archiv file + */ public BioModelRelease( String releaseName, String ftpDirectory, Date releaseDate, File archivFile ) { this.releaseName = releaseName; this.ftpDirectory = ftpDirectory; @@ -23,22 +44,52 @@ public BioModelRelease( String releaseName, String ftpDirectory, Date releaseDat this.archivFile = archivFile; } + /** + * The Constructor. + * + * @param releaseName the release name + * @param ftpDirectory the ftp directory + * @param releaseDate the release date + */ public BioModelRelease( String releaseName, String ftpDirectory, Date releaseDate ) { this.releaseName = releaseName; this.ftpDirectory = ftpDirectory; this.releaseDate = releaseDate; } + /** + * Gets the release name. + * + * @return the release name + */ public String getReleaseName() { return releaseName; } + + /** + * Gets the release date. + * + * @return the release date + */ public Date getReleaseDate() { return releaseDate; } + + /** + * Gets the archiv file. + * + * @return the archiv file + */ public File getArchivFile() { return archivFile; } + /** + * Sets the archiv file. + * + * @param archivFile the archiv file + * @return true, if sets the archiv file + */ public boolean setArchivFile(File archivFile) { //REMIND the archiv file could only be setted once! if( this.archivFile == null ) { @@ -49,22 +100,50 @@ public boolean setArchivFile(File archivFile) { return false; } + /** + * Gets the model list. + * + * @return the model list + */ public Set getModelList() { return modelMap.keySet(); } + /** + * Gets the model path. + * + * @param fileId the file id + * @return the model path + */ public File getModelPath( String fileId ) { return modelMap.get(fileId); } + /** + * Gets the ftp directory. + * + * @return the ftp directory + */ public String getFtpDirectory() { return ftpDirectory; } + /** + * Gets the content dir. + * + * @return the content dir + */ public File getContentDir() { return contentDir; } + /** + * Sets the content dir. + * + * @param contentDir the content dir + * @param modelMap the model map + * @return true, if sets the content dir + */ public boolean setContentDir(File contentDir, Map modelMap) { // REMIND the contentDir can only be setted once! if( this.contentDir == null && this.modelMap == null ) { @@ -76,14 +155,27 @@ public boolean setContentDir(File contentDir, Map modelMap) { return false; } + /** + * Checks if is downloaded. + * + * @return true, if checks if is downloaded + */ public boolean isDownloaded() { return archivFile == null ? false : true; } + /** + * Checks if is extracted. + * + * @return true, if checks if is extracted + */ public boolean isExtracted() { return contentDir == null ? false : true; } + /* (non-Javadoc) + * @see java.lang.Comparable#compareTo(java.lang.Object) + */ @Override public int compareTo( BioModelRelease model ) { return releaseDate.compareTo( model.getReleaseDate() ); diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelsChange.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelsChange.java index af34f3f..8a6bdb1 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelsChange.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelsChange.java @@ -14,21 +14,46 @@ import de.unirostock.sems.ModelCrawler.databases.Interface.Change; +// TODO: Auto-generated Javadoc +/** + * The Class BioModelsChange. + */ public class BioModelsChange extends Change { + /** The Constant serialVersionUID. */ private static final long serialVersionUID = -8763419545605089673L; + /** The log. */ private final Log log = LogFactory.getLog( BioModelsChange.class ); + /** The Constant HASH_ALGORITHM. */ public final static String HASH_ALGORITHM = "SHA-256"; + + /** The Constant HASH_ALGORITHM_FALLBACK. */ public final static String HASH_ALGORITHM_FALLBACK = "SHA"; + /** The Constant META_HASH. */ public final static String META_HASH = "filehash"; + /** + * The Constructor. + * + * @param fileId the file id + * @param versionId the version id + * @param versionDate the version date + * @param crawledDate the crawled date + */ public BioModelsChange( String fileId, String versionId, Date versionDate, Date crawledDate ) { super(fileId, versionId, versionDate, crawledDate); } + /** + * Sets the xml file. + * + * @param xmlFile the xml file + * @param hash the hash + * @return true, if sets the xml file + */ public boolean setXmlFile(File xmlFile, String hash) { //REMIND the xml file can only be setted once in a Change if( this.xmlFile == null && hash != null ) { @@ -45,19 +70,40 @@ else if( hash == null ) return false; } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.databases.Interface.Change#setXmlFile(java.io.File) + */ @Override public boolean setXmlFile( File xmlFile ) { return setXmlFile( xmlFile, calcXmlHash(xmlFile) ); } + /** + * Gets the hash. + * + * @return the hash + */ public String getHash() { return getMeta(META_HASH); } + /** + * Calc xml hash. + * + * @param xmlFile the xml file + * @return the string + */ protected String calcXmlHash( File xmlFile ) { return calcXmlHash( xmlFile, HASH_ALGORITHM ); } + /** + * Calc xml hash. + * + * @param xmlFile the xml file + * @param algo the algo + * @return the string + */ protected String calcXmlHash(File xmlFile, String algo) { String hash = null; diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelsChangeSet.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelsChangeSet.java index 3977682..0d0c9ee 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelsChangeSet.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelsChangeSet.java @@ -2,12 +2,26 @@ import de.unirostock.sems.ModelCrawler.databases.Interface.ChangeSet; +// TODO: Auto-generated Javadoc +/** + * The Class BioModelsChangeSet. + */ public class BioModelsChangeSet extends ChangeSet { + /** + * The Constructor. + * + * @param fileId the file id + */ public BioModelsChangeSet(String fileId) { super(fileId); } + /** + * Adds the change. + * + * @param change the change + */ public void addChange( BioModelsChange change ) { super.addChange(change); } diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelsDb.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelsDb.java index aede9df..4d6474b 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelsDb.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/BioModelsDb.java @@ -49,23 +49,49 @@ import de.unirostock.sems.morre.client.exception.MorreCommunicationException; import de.unirostock.sems.morre.client.exception.MorreException; +// TODO: Auto-generated Javadoc +/** + * The Class BioModelsDb. + */ public class BioModelsDb implements ModelDatabase { + /** The log. */ private final Log log = LogFactory.getLog( BioModelsDb.class ); + /** The ftp url. */ private URL ftpUrl; + + /** The ftp client. */ private FTPClient ftpClient; + + /** The release list. */ private List releaseList = new ArrayList(); - protected File workingDir, tempDir; + /** The temp dir. */ + protected File workingDir; + + /** The temp dir. */ + protected File tempDir; + + /** The config. */ protected java.util.Properties config; + /** The change set map. */ protected Map changeSetMap = new HashMap(); + /** The morre client. */ protected MorreCrawlerInterface morreClient = null; // protected GraphDatabase graphDb = null; - public BioModelsDb(String ftpUrl, MorreCrawlerInterface morreClient) throws MalformedURLException, IllegalArgumentException { + /** + * The Constructor. + * + * @param ftpUrl the ftp url + * @param morreClient the morre client + * @throws MalformedURLException the malformed url exception + * @throws IllegalArgumentException the illegal argument exception + */ +public BioModelsDb(String ftpUrl, MorreCrawlerInterface morreClient) throws MalformedURLException, IllegalArgumentException { this.ftpUrl = new URL(ftpUrl); // this.graphDb = graphDb; this.morreClient = morreClient; @@ -86,25 +112,44 @@ public BioModelsDb(String ftpUrl, MorreCrawlerInterface morreClient) throws Malf } + /** + * The Constructor. + * + * @param morreClient the morre client + * @throws MalformedURLException the malformed url exception + * @throws IllegalArgumentException the illegal argument exception + */ public BioModelsDb( MorreCrawlerInterface morreClient ) throws MalformedURLException, IllegalArgumentException { this( Properties.getProperty("de.unirostock.sems.ModelCrawler.BioModelsDb.ftpUrl"), morreClient ); } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.databases.Interface.ModelDatabase#listModels() + */ @Override public List listModels() { return new ArrayList( changeSetMap.keySet() ); } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.databases.Interface.ModelDatabase#listChanges() + */ @Override public Map listChanges() { return changeSetMap; } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.databases.Interface.ModelDatabase#getModelChanges(java.lang.String) + */ @Override public ChangeSet getModelChanges(String fileId) { return changeSetMap.get(fileId); } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.databases.Interface.ModelDatabase#cleanUp() + */ @Override public void cleanUp() { @@ -119,6 +164,9 @@ public void cleanUp() { } } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.databases.Interface.ModelDatabase#run() + */ @Override public void run() { List newReleases = new ArrayList(); @@ -188,9 +236,9 @@ public void run() { /** * Downloads, extracts and indexes the gives release - * must called for each new release CHRONOLOGICAL - * - * @param release + * must called for each new release CHRONOLOGICAL. + * + * @param release the release */ protected void processRelease( BioModelRelease release ) { @@ -233,6 +281,9 @@ protected void processRelease( BioModelRelease release ) { } } + /** + * Check and init working dir. + */ protected void checkAndInitWorkingDir() { workingDir = new File( Properties.getWorkingDir(), Properties.getProperty("de.unirostock.sems.ModelCrawler.BioModelsDb.subWorkingDir") ); @@ -270,6 +321,9 @@ protected void checkAndInitWorkingDir() { } + /** + * Save properties. + */ protected void saveProperties() { if( config == null ) { @@ -287,6 +341,13 @@ protected void saveProperties() { } + /** + * Connect. + * + * @throws FtpConnectionException the ftp connection exception + * @throws IOException the IO exception + * @throws SocketException the socket exception + */ protected void connect() throws FtpConnectionException, IOException, SocketException { log.info("connecting to ftp server"); @@ -335,6 +396,9 @@ protected void connect() throws FtpConnectionException, IOException, SocketExcep } + /** + * Disconnect. + */ protected void disconnect() { try { ftpClient.logout(); @@ -344,6 +408,12 @@ protected void disconnect() { } } + /** + * Retrieve release list. + * + * @return the list< bio model release> + * @throws IOException the IO exception + */ protected List retrieveReleaseList() throws IOException { // cleares the list @@ -394,6 +464,13 @@ protected List retrieveReleaseList() throws IOException { return releaseList; } + /** + * Download release. + * + * @param release the release + * @return true, if download release + * @throws UnsupportedCompressionAlgorithmException the unsupported compression algorithm exception + */ private boolean downloadRelease( BioModelRelease release ) throws UnsupportedCompressionAlgorithmException { String archiv; File target; @@ -507,6 +584,12 @@ else if( archiv.endsWith(".tar") ) { return true; } + /** + * Find sbml archiv file. + * + * @return the string + * @throws IOException the IO exception + */ private String findSbmlArchivFile() throws IOException { FTPFile[] files = ftpClient.listFiles(); @@ -525,6 +608,13 @@ private String findSbmlArchivFile() throws IOException { return null; } + /** + * Extract release. + * + * @param release the release + * @throws IllegalArgumentException the illegal argument exception + * @throws ExtractException the extract exception + */ private void extractRelease( BioModelRelease release ) throws IllegalArgumentException, ExtractException { // already extracted or not even downloaded - just for safety... @@ -619,6 +709,13 @@ private void extractRelease( BioModelRelease release ) throws IllegalArgumentExc } } + /** + * Tranfer change. + * + * @param fileId the file id + * @param release the release + * @param crawledDate the crawled date + */ private void tranferChange( String fileId, BioModelRelease release, Date crawledDate ) { boolean isChangeNew = false; diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/exceptions/ExtractException.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/exceptions/ExtractException.java index de59767..6fe5661 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/exceptions/ExtractException.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/exceptions/ExtractException.java @@ -1,20 +1,44 @@ package de.unirostock.sems.ModelCrawler.databases.BioModelsDb.exceptions; +// TODO: Auto-generated Javadoc +/** + * The Class ExtractException. + */ public class ExtractException extends Exception { + /** The Constant serialVersionUID. */ private static final long serialVersionUID = -4419252545030334590L; + /** + * The Constructor. + */ public ExtractException() { } + /** + * The Constructor. + * + * @param message the message + */ public ExtractException(String message) { super(message); } + /** + * The Constructor. + * + * @param cause the cause + */ public ExtractException(Throwable cause) { super(cause); } + /** + * The Constructor. + * + * @param message the message + * @param cause the cause + */ public ExtractException(String message, Throwable cause) { super(message, cause); } diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/exceptions/FtpConnectionException.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/exceptions/FtpConnectionException.java index f51a03c..2bff284 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/exceptions/FtpConnectionException.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/BioModelsDb/exceptions/FtpConnectionException.java @@ -1,21 +1,45 @@ package de.unirostock.sems.ModelCrawler.databases.BioModelsDb.exceptions; +// TODO: Auto-generated Javadoc +/** + * The Class FtpConnectionException. + */ public class FtpConnectionException extends Exception { + /** The Constant serialVersionUID. */ private static final long serialVersionUID = -4858702130474478887L; + /** + * The Constructor. + */ public FtpConnectionException() { super(); } + /** + * The Constructor. + * + * @param message the message + */ public FtpConnectionException(String message) { super(message); } + /** + * The Constructor. + * + * @param cause the cause + */ public FtpConnectionException(Throwable cause) { super(cause); } + /** + * The Constructor. + * + * @param message the message + * @param cause the cause + */ public FtpConnectionException(String message, Throwable cause) { super(message, cause); } diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/Change.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/Change.java index fed7bd6..8574332 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/Change.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/Change.java @@ -15,20 +15,49 @@ import de.unirostock.sems.XmlFileServerClient.exceptions.XmlFileServerBadRequestException; import de.unirostock.sems.XmlFileServerClient.exceptions.XmlFileServerProtocollException; +// TODO: Auto-generated Javadoc +/** + * The Class Change. + */ public abstract class Change extends CrawledModelRecord implements Comparable { + /** The Constant serialVersionUID. */ private static final long serialVersionUID = 3880353134783045794L; + + /** The version date. */ private transient Date versionDate = null; + + /** The crawled date. */ private transient Date crawledDate = null; + /** The xml file. */ protected transient File xmlFile = null; + /** + * The Constructor. + * + * @param fileId the file id + * @param versionId the version id + * @param versionDate the version date + * @param crawledDate the crawled date + */ public Change( String fileId, String versionId, Date versionDate, Date crawledDate ) { super(fileId, versionId, versionDate, crawledDate); this.versionDate = versionDate; this.crawledDate = crawledDate; } + /** + * Push to xml file server. + * + * @param server the server + * @throws XmlNotFoundException the xml not found exception + * @throws ModelAlreadyExistsException the model already exists exception + * @throws XmlFileServerBadRequestException the xml file server bad request exception + * @throws UnsupportedUriException the unsupported uri exception + * @throws XmlFileServerProtocollException the xml file server protocoll exception + * @throws IOException the IO exception + */ public void pushToXmlFileServer( XmlFileServer server ) throws XmlNotFoundException, ModelAlreadyExistsException, XmlFileServerBadRequestException, UnsupportedUriException, XmlFileServerProtocollException, IOException { if( xmlFile == null ) @@ -54,10 +83,21 @@ public void pushToXmlFileServer( XmlFileServer server ) throws XmlNotFoundExcept } + /** + * Gets the xml file. + * + * @return the xml file + */ public File getXmlFile() { return xmlFile; } + /** + * Sets the xml file. + * + * @param xmlFile the xml file + * @return true, if sets the xml file + */ public boolean setXmlFile( File xmlFile ) { //REMIND the xml file can only be setted once in a Change if( this.xmlFile == null ) { @@ -68,12 +108,18 @@ public boolean setXmlFile( File xmlFile ) { return false; } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.helper.CrawledModelRecord#setVersionDate(java.util.Date) + */ @Override public void setVersionDate(Date versionDate) { super.setVersionDate(versionDate); this.versionDate = versionDate; } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.helper.CrawledModelRecord#getVersionDate() + */ @Override public Date getVersionDate() { if( versionDate == null ) @@ -82,12 +128,18 @@ public Date getVersionDate() { return versionDate; } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.helper.CrawledModelRecord#setCrawledDate(java.util.Date) + */ @Override public void setCrawledDate(Date crawledDate) { super.setCrawledDate(crawledDate); this.crawledDate = crawledDate; } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.helper.CrawledModelRecord#getCrawledDate() + */ @Override public Date getCrawledDate() { if( crawledDate == null ) @@ -96,11 +148,17 @@ public Date getCrawledDate() { return crawledDate; } + /* (non-Javadoc) + * @see java.lang.Comparable#compareTo(java.lang.Object) + */ @Override public int compareTo( Change change ) { return getVersionDate().compareTo( change.getVersionDate() ); } + /* (non-Javadoc) + * @see de.unirostock.sems.morre.client.dataholder.CrawledModel#toString() + */ @Override public String toString() { return "Chg:" + getFileId()+"@"+getVersionId(); diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/ChangeSet.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/ChangeSet.java index 938c15f..ca7bb8e 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/ChangeSet.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/ChangeSet.java @@ -4,15 +4,32 @@ import java.util.Set; import java.util.TreeSet; +// TODO: Auto-generated Javadoc +/** + * The Class ChangeSet. + */ public abstract class ChangeSet { + /** The file id. */ protected String fileId; + + /** The changes. */ protected NavigableSet changes; + /** + * Gets the changes. + * + * @return the changes + */ public Set getChanges() { return changes; } + /** + * Gets the latest change. + * + * @return the latest change + */ public Change getLatestChange() { if( changes.size() > 0 ) return changes.last(); @@ -20,20 +37,38 @@ public Change getLatestChange() { return null; } + /** + * Adds the change. + * + * @param change the change + */ public void addChange(Change change) { if( change.getFileId().equals(fileId) ) changes.add(change); } + /** + * The Constructor. + * + * @param fileId the file id + */ public ChangeSet( String fileId ) { changes = new TreeSet(); this.fileId = fileId; } + /** + * Gets the file id. + * + * @return the file id + */ public String getFileId() { return fileId; } + /* (non-Javadoc) + * @see java.lang.Object#toString() + */ @Override public String toString() { return "CS:" + fileId + "-" + changes.size(); diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/ModelDatabase.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/ModelDatabase.java index 6054c60..54e6807 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/ModelDatabase.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/ModelDatabase.java @@ -3,11 +3,15 @@ import java.util.List; import java.util.Map; +// TODO: Auto-generated Javadoc +/** + * The Interface ModelDatabase. + */ public interface ModelDatabase extends Runnable { /** - * lists all Models in the latest revision - * + * lists all Models in the latest revision. + * * @return List with all model IDs */ public List listModels(); @@ -20,22 +24,21 @@ public interface ModelDatabase extends Runnable { */ public Map listChanges(); - /** - * Returns the ChangeSet only for one specific model - * - * @param fileId + /** + * Returns the ChangeSet only for one specific model. + * + * @param fileId the file id * @return ChangeSet */ public ChangeSet getModelChanges( String fileId ); /** - * Cleans up the working directory + * Cleans up the working directory. */ public void cleanUp(); /** - * Starts the prozess of crawling for this specific Database - * + * Starts the prozess of crawling for this specific Database. */ @Override public void run(); diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/exceptions/XmlNotFoundException.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/exceptions/XmlNotFoundException.java index b517046..2762620 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/exceptions/XmlNotFoundException.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/Interface/exceptions/XmlNotFoundException.java @@ -1,21 +1,45 @@ package de.unirostock.sems.ModelCrawler.databases.Interface.exceptions; +// TODO: Auto-generated Javadoc +/** + * The Class XmlNotFoundException. + */ public class XmlNotFoundException extends Exception { + /** The Constant serialVersionUID. */ private static final long serialVersionUID = 9151151806312439280L; + /** + * The Constructor. + */ public XmlNotFoundException() { super(); } + /** + * The Constructor. + * + * @param message the message + */ public XmlNotFoundException(String message) { super(message); } + /** + * The Constructor. + * + * @param cause the cause + */ public XmlNotFoundException(Throwable cause) { super(cause); } + /** + * The Constructor. + * + * @param message the message + * @param cause the cause + */ public XmlNotFoundException(String message, Throwable cause) { super(message, cause); } diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/PmrChange.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/PmrChange.java index 82feabe..b4dedc6 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/PmrChange.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/PmrChange.java @@ -16,16 +16,43 @@ import de.unirostock.sems.XmlFileServerClient.exceptions.XmlFileServerBadRequestException; import de.unirostock.sems.XmlFileServerClient.exceptions.XmlFileServerProtocollException; +// TODO: Auto-generated Javadoc +/** + * The Class PmrChange. + */ public class PmrChange extends Change { + /** The Constant serialVersionUID. */ private static final long serialVersionUID = 4740459688628719898L; + + /** The repository url. */ protected transient String repositoryUrl = null; + + /** The file name. */ protected transient String fileName = null; + /** + * The Constructor. + * + * @param fileId the file id + * @param versionId the version id + * @param versionDate the version date + * @param crawledDate the crawled date + */ public PmrChange(String fileId, String versionId, Date versionDate, Date crawledDate) { super(fileId, versionId, versionDate, crawledDate); } + /** + * The Constructor. + * + * @param repositoryUrl the repository url + * @param fileName the file name + * @param versionId the version id + * @param versionDate the version date + * @param crawledDate the crawled date + * @throws UnsupportedEncodingException the unsupported encoding exception + */ public PmrChange( String repositoryUrl, String fileName, String versionId, Date versionDate, Date crawledDate ) throws UnsupportedEncodingException { super( null, versionId, versionDate, crawledDate ); this.repositoryUrl = repositoryUrl; @@ -33,12 +60,26 @@ public PmrChange( String repositoryUrl, String fileName, String versionId, Date setFileId( XmlFileRepository.generateFileId(repositoryUrl, fileName) ); } + /** + * The Constructor. + * + * @param fileId the file id + * @param repositoryUrl the repository url + * @param fileName the file name + * @param versionId the version id + * @param versionDate the version date + * @param crawledDate the crawled date + * @throws UnsupportedEncodingException the unsupported encoding exception + */ public PmrChange( String fileId, String repositoryUrl, String fileName, String versionId, Date versionDate, Date crawledDate ) throws UnsupportedEncodingException { super( fileId, versionId, versionDate, crawledDate ); this.repositoryUrl = repositoryUrl; this.fileName = fileName; } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.databases.Interface.Change#pushToXmlFileServer(de.unirostock.sems.XmlFileServerClient.XmlFileServer) + */ @Override public void pushToXmlFileServer( XmlFileServer server ) throws XmlNotFoundException, ModelAlreadyExistsException, XmlFileServerBadRequestException, UnsupportedUriException, XmlFileServerProtocollException, IOException { if( xmlFile == null ) @@ -64,6 +105,9 @@ public void pushToXmlFileServer( XmlFileServer server ) throws XmlNotFoundExcept } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.databases.Interface.Change#toString() + */ @Override public String toString() { return "PmrChg:" + repositoryUrl+":"+fileName+"@"+getVersionId(); diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/PmrChangeSet.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/PmrChangeSet.java index 16768be..18cae6a 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/PmrChangeSet.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/PmrChangeSet.java @@ -2,12 +2,26 @@ import de.unirostock.sems.ModelCrawler.databases.Interface.ChangeSet; +// TODO: Auto-generated Javadoc +/** + * The Class PmrChangeSet. + */ public class PmrChangeSet extends ChangeSet { + /** + * The Constructor. + * + * @param fileId the file id + */ public PmrChangeSet(String fileId) { super(fileId); } + /** + * Adds the change. + * + * @param change the change + */ public void addChange( PmrChange change ) { super.addChange(change); } diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/PmrDb.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/PmrDb.java index cd14543..84f19a8 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/PmrDb.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/PmrDb.java @@ -29,8 +29,6 @@ import java.util.Scanner; import java.util.UUID; -import javax.xml.parsers.ParserConfigurationException; - import org.apache.commons.io.FileUtils; import org.apache.commons.io.FilenameUtils; import org.apache.commons.logging.Log; @@ -40,14 +38,26 @@ import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.HttpClientBuilder; - -import com.aragost.javahg.Changeset; -import com.aragost.javahg.Repository; -import com.aragost.javahg.commands.ExecutionException; -import com.aragost.javahg.commands.LogCommand; -import com.aragost.javahg.commands.PullCommand; -import com.aragost.javahg.commands.UpdateCommand; - +import org.eclipse.jgit.api.Git; +import org.eclipse.jgit.api.LogCommand; +import org.eclipse.jgit.api.PullCommand; +import org.eclipse.jgit.api.PullResult; +import org.eclipse.jgit.api.errors.CheckoutConflictException; +import org.eclipse.jgit.api.errors.GitAPIException; +import org.eclipse.jgit.api.errors.InvalidRefNameException; +import org.eclipse.jgit.api.errors.RefAlreadyExistsException; +import org.eclipse.jgit.api.errors.RefNotFoundException; +import org.eclipse.jgit.diff.DiffEntry; +import org.eclipse.jgit.diff.DiffFormatter; +import org.eclipse.jgit.diff.RawTextComparator; +import org.eclipse.jgit.lib.Constants; +import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.revwalk.RevWalk; +import org.eclipse.jgit.util.io.DisabledOutputStream; + +import de.binfalse.bfutils.GeneralTools; import de.unirostock.sems.ModelCrawler.Properties; import de.unirostock.sems.ModelCrawler.databases.Interface.Change; import de.unirostock.sems.ModelCrawler.databases.Interface.ChangeSet; @@ -59,31 +69,72 @@ import de.unirostock.sems.morre.client.exception.MorreCommunicationException; import de.unirostock.sems.morre.client.exception.MorreException; + +/*import com.aragost.javahg.Changeset; +import com.aragost.javahg.Repository; +import com.aragost.javahg.commands.ExecutionException; +import com.aragost.javahg.commands.LogCommand; +import com.aragost.javahg.commands.PullCommand; +import com.aragost.javahg.commands.UpdateCommand;*/ +import org.eclipse.jgit.api.CheckoutCommand; + +// TODO: Auto-generated Javadoc +/** + * The Class PmrDb. + */ public class PmrDb implements ModelDatabase { + /** The Constant HASH_ALGO. */ private static final String HASH_ALGO = "MD5"; + /** The log. */ private final Log log = LogFactory.getLog( PmrDb.class ); + /** The working dir. */ protected File workingDir; + + /** The temp dir. */ protected File tempDir; + + /** The config. */ protected java.util.Properties config; + + /** The morre client. */ protected MorreCrawlerInterface morreClient; + + /** The repo list uri. */ protected URI repoListUri; + + /** The classifier. */ protected DocumentClassifier classifier = null; + /** The file extension blacklist. */ protected HashSet fileExtensionBlacklist = null; + /** The change set map. */ protected Map changeSetMap = new HashMap(); // REMIND there is difference between ChangeSet and Changeset // ChangeSet is a ModelCrawler Dataholder class // and Changeset a JavaHg Dataholder class + /** + * The Constructor. + * + * @param morreClient the morre client + * @throws IllegalArgumentException the illegal argument exception + */ public PmrDb( MorreCrawlerInterface morreClient ) throws IllegalArgumentException { this( Properties.getProperty("de.unirostock.sems.ModelCrawler.PMR2.RepoList"), morreClient ); } + /** + * The Constructor. + * + * @param repoListUrl the repo list url + * @param morreClient the morre client + * @throws IllegalArgumentException the illegal argument exception + */ public PmrDb(String repoListUrl, MorreCrawlerInterface morreClient) throws IllegalArgumentException { this.morreClient = morreClient; @@ -101,11 +152,11 @@ public PmrDb(String repoListUrl, MorreCrawlerInterface morreClient) throws Illeg log.info( MessageFormat.format("Init new PMR2 Connector based on Repolist: {0}", this.repoListUri) ); // Prepare BiVeS Model Classifier - try { + //try { classifier = new DocumentClassifier (); - } catch (ParserConfigurationException e) { + /*} catch (ParserConfigurationException e) { log.fatal( "ParserConfigurationException while init BiVeS Document Classifier", e ); - } + }*/ if( log.isInfoEnabled() ) log.info("Started BiVeS Classifier"); @@ -121,21 +172,33 @@ public PmrDb(String repoListUrl, MorreCrawlerInterface morreClient) throws Illeg } } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.databases.Interface.ModelDatabase#listModels() + */ @Override public List listModels() { return new ArrayList( changeSetMap.keySet() ); } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.databases.Interface.ModelDatabase#listChanges() + */ @Override public Map listChanges() { return changeSetMap; } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.databases.Interface.ModelDatabase#getModelChanges(java.lang.String) + */ @Override public ChangeSet getModelChanges(String fileId) { return changeSetMap.get(fileId); } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.databases.Interface.ModelDatabase#cleanUp() + */ @Override public void cleanUp() { // save the config @@ -149,6 +212,9 @@ public void cleanUp() { } } + /* (non-Javadoc) + * @see de.unirostock.sems.ModelCrawler.databases.Interface.ModelDatabase#run() + */ @Override public void run() { @@ -171,7 +237,7 @@ public void run() { Iterator iter = repositories.iterator(); while( iter.hasNext() ) { - Repository repo = null; + Git repo = null; boolean hasChanges = false; String repoName = iter.next(); @@ -200,7 +266,7 @@ public void run() { log.debug( MessageFormat.format("Repository {0} is known. Perform a Pull-Request into local copy {1}", repoName, location.getAbsolutePath()) ); // Repo is already known -> make a pull - Entry pullResult = pullRepository(location); + Entry pullResult = pullRepository(location); repo = pullResult.getKey(); // are there changes in the Repo? hasChanges = pullResult.getValue(); @@ -231,6 +297,9 @@ public void run() { } + /** + * Check and init working dir. + */ protected void checkAndInitWorkingDir() { workingDir = new File( Properties.getWorkingDir(), Properties.getProperty("de.unirostock.sems.ModelCrawler.PMR2.subWorkingDir") ); @@ -269,8 +338,9 @@ protected void checkAndInitWorkingDir() { } /** - * Returns a non existent temporary file - * @return + * Returns a non existent temporary file. + * + * @return the temp file */ protected File getTempFile() { File temp = new File( tempDir, UUID.randomUUID().toString() ); @@ -281,6 +351,9 @@ protected File getTempFile() { return temp; } + /** + * Save properties. + */ protected void saveProperties() { if( config == null ) { @@ -299,10 +372,10 @@ protected void saveProperties() { /** - * Retrieves the txt Repository List and puts it in a list - * - * @return - * @throws HttpException + * Retrieves the txt Repository List and puts it in a list. + * + * @return the repository list + * @throws HttpException the http exception */ protected List getRepositoryList() throws HttpException { List repoList = new LinkedList(); @@ -333,10 +406,10 @@ protected List getRepositoryList() throws HttpException { /** - * Creates the directory for the given Repository - * - * @param repository - * @return + * Creates the directory for the given Repository. + * + * @param repository the repository + * @return the file */ protected File makeRepositoryDirectory( String repository ) { @@ -383,9 +456,9 @@ protected File makeRepositoryDirectory( String repository ) { } /** - * Gets the Path to the Repository Directory out of Workspace config or null if it fails - * - * @param repository + * Gets the Path to the Repository Directory out of Workspace config or null if it fails. + * + * @param repository the repository * @return File */ protected File getRepositoryDirectory( String repository ) { @@ -399,10 +472,10 @@ protected File getRepositoryDirectory( String repository ) { } /** - * Calculates the hash from the Repository URL - * - * @param repository - * @return + * Calculates the hash from the Repository URL. + * + * @param repository the repository + * @return the string */ private String calculateRepositoryHash( String repository ) { String repoHash = null; @@ -418,40 +491,91 @@ private String calculateRepositoryHash( String repository ) { return repoHash; } - protected Repository cloneRepository(File local, String remote) { - Repository repo = Repository.clone(local, remote); + /** + * Clone repository. + * + * @param local the local + * @param remote the remote + * @return the repository + */ + protected Git cloneRepository(File local, String remote) { + + + + Git repo = null; + + try + { + repo = Git.cloneRepository() + .setURI( remote ) + .setDirectory( local ) + .setCloneSubmodules(true) // include all submodules -> important for PMR2-Project + .call(); + } + catch (GitAPIException e) + { + log.fatal (MessageFormat.format("Can not clone Mercurial Repository {0} into {1}", remote, local.getAbsolutePath()), e); + } + //Repository.clone(local, remote); if( repo == null ) log.fatal( MessageFormat.format("Can not clone Mercurial Repository {0} into {1}", remote, local.getAbsolutePath()) ); return repo; } - protected Entry pullRepository(File location) { + /** + * Pull repository. + * + * @param location the location + * @return the entry< repository, boolean> + */ + protected Entry pullRepository(File location) { boolean hasChanges = false; - Repository repo = Repository.open(location); + + Git repo = null; + + try + { + repo = Git.open (location); + } + catch (IOException e) + { + log.fatal( MessageFormat.format("Can not open Git Repository in {0}", location.getAbsolutePath()), e); + } if( repo != null) { - PullCommand pull = new PullCommand(repo); + //PullCommand pull = new PullCommand(repo); + PullCommand pull = repo.pull (); try { - List changes = pull.execute(); + PullResult pr = pull.call (); + if (pr.isSuccessful () && pr.getFetchResult ().getTrackingRefUpdates ().size () > 0) + + /*List changes = pull.execute(); // when pull was successful and there are some Changes - if( pull.isSuccessful() && changes.size() > 0) + if( pull.isSuccessful() && changes.size() > 0)*/ hasChanges = true; - } catch (IOException e) { - log.fatal( MessageFormat.format("Can not pull Mercurial Repository into {0}", location.getAbsolutePath()), e); + } catch (GitAPIException e) { + log.fatal( MessageFormat.format("Can not pull Git Repository into {0}", location.getAbsolutePath()), e); } } - return new AbstractMap.SimpleEntry(repo, hasChanges); + return new AbstractMap.SimpleEntry(repo, hasChanges); } - protected void scanAndTransferRepository( String repoUrl, File location, Repository repo ) { + /** + * Scan and transfer repository. + * + * @param repoUrl the repo url + * @param location the location + * @param repo the repo + */ + protected void scanAndTransferRepository( String repoUrl, File location, Git repo ) { // select all relevant files // than going throw the versions List relevantFiles; - List relevantVersions; + Iterable relevantVersions; // TODO Logging! @@ -488,12 +612,12 @@ protected void scanAndTransferRepository( String repoUrl, File location, Reposit return; // sorting them (just in case...) - Collections.sort(relevantVersions, new Comparator() { + /*Collections.sort(relevantVersions, new Comparator() { @Override public int compare(Changeset cs1, Changeset cs2) { return cs1.getTimestamp().getDate().compareTo( cs2.getTimestamp().getDate() ); } - } ); + } );*/ // make it! // (going throw each relevant Version and saves all relevant Files in every relevant - and new - Version) @@ -513,7 +637,14 @@ public int compare(Changeset cs1, Changeset cs2) { } - protected List scanRepository( File location, Repository repo ) { + /** + * Scan repository. + * + * @param location the location + * @param repo the repo + * @return the list< relevant file> + */ + protected List scanRepository( File location, Git repo ) { List relevantFiles = new LinkedList(); // scans the directory recursively @@ -522,6 +653,13 @@ protected List scanRepository( File location, Repository repo ) { return relevantFiles; } + /** + * Scan repository dir. + * + * @param base the base + * @param dir the dir + * @param relevantFiles the relevant files + */ private void scanRepositoryDir( File base, File dir, List relevantFiles ) { if( log.isTraceEnabled() ) @@ -570,11 +708,11 @@ else if( entry.isFile() && entry.exists() ) { /** * Checks if the file is a model aka relevant
- * Returns a RelevantFile object if it is or null - * - * @param base - * @param model - * @return + * Returns a RelevantFile object if it is or null. + * + * @param base the base + * @param model the model + * @return the relevant file */ private RelevantFile isRelevant( File base, File model ) { int type = 0; @@ -607,6 +745,11 @@ private RelevantFile isRelevant( File base, File model ) { return relevantFile; } + /** + * Search latest known version. + * + * @param relevantFile the relevant file + */ protected void searchLatestKnownVersion( RelevantFile relevantFile ) { String versionId = null; Date versionDate = null; @@ -665,14 +808,23 @@ else if( log.isDebugEnabled() ) { } - protected List detectRelevantVersions( Repository repo, List relevantFiles ) { - String[] files; + /** + * Detect relevant versions. + * + * @param repo the repo + * @param relevantFiles the relevant files + * @return the list< changeset> + */ + protected Iterable detectRelevantVersions( Git repo, List relevantFiles ) { + //String[] files; Date oldestLatestVersionDate = null; boolean foundOldestLatestVersionDate = false; - List relevantVersions = null; + Iterable + //List + relevantVersions = null; if( log.isInfoEnabled() ) - log.info("start detection of relevant hg versions"); + log.info("start detection of relevant git versions"); if( relevantFiles.size() == 0 ) { if( log.isInfoEnabled() ) @@ -682,14 +834,17 @@ protected List detectRelevantVersions( Repository repo, List fileIter = relevantFiles.iterator(); while( fileIter.hasNext() ) { RelevantFile file = fileIter.next(); - files[index] = file.getFilePath(); + //files[index] = + logCmd.addPath (file.getFilePath()); index++; // checks if the current processed relevantFile has an older latestVersion as the @@ -705,32 +860,50 @@ else if( file.getLatestVersionDate().compareTo(oldestLatestVersionDate) < 0 ) { } } - + if( log.isDebugEnabled() ) log.debug( MessageFormat.format("execute Log command for {0} file(s)", index) ); // perform the log command to evaluate all interesting hg changesets - LogCommand logCmd = new LogCommand(repo); - relevantVersions = logCmd.execute(files); + //Iterable logs + try + { + relevantVersions = logCmd.call(); + } + catch (GitAPIException e) + { + log.error( "cannot call git log ", e ); + } + /*LogCommand logCmd = new LogCommand(repo); + relevantVersions = logCmd.execute(files);*/ + int numVersions = GeneralTools.sizeOfIterable (relevantVersions);/*0; + if (relevantVersions instanceof Collection) + numVersions = ((Collection)relevantVersions).size(); + else + for(RevCommit v : relevantVersions) { + numVersions++; + }*/ + if( oldestLatestVersionDate == null ) { // oldestLatestVersionDate is null -> there is no latest version known for any of the relevantFiles/-Models if( log.isInfoEnabled() ) - log.info( MessageFormat.format("Found {0} Changesets. Can not skip any of them, because no one is indexed", relevantVersions.size()) ); + log.info( MessageFormat.format("Found {0} Changesets. Can not skip any of them, because no one is indexed", numVersions) ); } else { if( log.isInfoEnabled() ) - log.info( MessageFormat.format("Found {0} Changesets, removes all Changeset older as {1} (oldestLatestVersion) from the list", relevantVersions.size(), oldestLatestVersionDate) ); + log.info( MessageFormat.format("Found {0} Changesets, removes all Changeset older as {1} (oldestLatestVersion) from the list", numVersions, oldestLatestVersionDate) ); // remove every Changeset which is older as the oldestLatestVersion (because they are really uninteresting) - Iterator changesetIter = relevantVersions.iterator(); - while( changesetIter.hasNext() ) { - if( changesetIter.next().getTimestamp().getDate().compareTo(oldestLatestVersionDate) < 0 ) + Iterator changesetIter = relevantVersions.iterator(); + while( changesetIter.hasNext() ) + { + if (new Date (changesetIter.next().getCommitTime ()).compareTo(oldestLatestVersionDate) < 0) changesetIter.remove(); } if( log.isInfoEnabled() ) - log.info( MessageFormat.format("{0} Changsets left for examination", relevantVersions.size()) ); + log.info( MessageFormat.format("{0} Changsets left for examination", GeneralTools.sizeOfIterable (relevantVersions)) ); } @@ -738,36 +911,66 @@ else if( file.getLatestVersionDate().compareTo(oldestLatestVersionDate) < 0 ) { return relevantVersions; } - protected void iterateRelevantVersions( Repository repo, File location, List relevantFiles, List relevantVersions ) throws IOException { + /** + * Iterate relevant versions. + * + * @param repo the repo + * @param location the location + * @param relevantFiles the relevant files + * @param relevantVersions the relevant versions + * @throws IOException the IO exception + */ + protected void iterateRelevantVersions( Git repo, File location, List relevantFiles, Iterable relevantVersions ) throws IOException { Date crawledDate = new Date(); if( log.isInfoEnabled() ) log.info( MessageFormat.format("Going throw all relevant versions of {0}", location) ); - for( Changeset currentChangeset : relevantVersions ) { - String currentNodeId = currentChangeset.getNode(); - Date currentVersionDate = currentChangeset.getTimestamp().getDate(); + for( RevCommit currentChangeset : relevantVersions ) { + // node a changeset ID, must be 40 hexadecimal characters. + ObjectId currentNodeId = currentChangeset.getId ();//.getNode(); + Date currentVersionDate = new Date (currentChangeset.getCommitTime ());//.getTimestamp().getDate(); if( log.isInfoEnabled() ) - log.info( MessageFormat.format("Update to {0} Message: {1}", currentNodeId, currentChangeset.getMessage()) ); + log.info( MessageFormat.format("Update to {0} Message: {1}", currentNodeId.toString (), currentChangeset.getShortMessage ()) ); // update to currentChangeset - UpdateCommand updateCmd = new UpdateCommand(repo); - updateCmd.rev(currentChangeset); + + //UpdateCommand updateCmd = new UpdateCommand(repo); + CheckoutCommand co = repo.checkout ().setStartPoint (currentChangeset); + //updateCmd.rev(currentChangeset); try { - updateCmd.execute(); - } catch (IOException e) { - log.error( MessageFormat.format("IOException while updating {0} to {1}. skip this repo after now.", location, currentNodeId), e); + co.call ();//.execute(); + } catch (GitAPIException e) { + log.error( MessageFormat.format("IOException while updating {0} to {1}. skip this repo after now.", location, currentNodeId.toString ()), e); return; - } catch (ExecutionException e) { - log.error( MessageFormat.format("IOException while updating {0} to {1}. skip this repo after now.", location, currentNodeId), e); + }/* catch (ExecutionException e) { + log.error( MessageFormat.format("IOException while updating {0} to {1}. skip this repo after now.", location, currentNodeId.toString ()), e); return; - } - + }*/ + + // get all added or modified files in this Changeset - List changedFiles = new ArrayList(); + List changedFiles = new ArrayList();/* changedFiles.addAll( currentChangeset.getAddedFiles() ); - changedFiles.addAll( currentChangeset.getModifiedFiles() ); + changedFiles.addAll( currentChangeset.getModifiedFiles() );*/ + + + + Repository repository = repo.getRepository (); + RevWalk rw = new RevWalk(repository); + ObjectId head = repository.resolve(Constants.HEAD); + RevCommit commit = rw.parseCommit (head); + RevCommit parent = rw.parseCommit(commit.getParent(0).getId()); + DiffFormatter df = new DiffFormatter(DisabledOutputStream.INSTANCE); + df.setRepository(repository); + df.setDiffComparator(RawTextComparator.DEFAULT); + df.setDetectRenames(true); + List diffs = df.scan(parent.getTree(), commit.getTree()); + for (DiffEntry diff : diffs) { + changedFiles.add (diff.getNewPath()); + } + if( log.isInfoEnabled() ) log.info( MessageFormat.format("{0} changed files in this version", changedFiles.size()) ); @@ -824,7 +1027,7 @@ protected void iterateRelevantVersions( Repository repo, File location, List 0 ) diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/RelevantFile.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/RelevantFile.java index 3f98dc1..0354943 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/RelevantFile.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/RelevantFile.java @@ -6,49 +6,98 @@ import de.unirostock.sems.ModelCrawler.XmlFileRepository.XmlFileRepository; import de.unirostock.sems.ModelCrawler.databases.Interface.Change; +// TODO: Auto-generated Javadoc +/** + * The Class RelevantFile. + */ public class RelevantFile { + /** The file path. */ private String filePath; + + /** The repo url. */ private String repoUrl = null; + + /** The file id. */ private String fileId; + + /** The latest known version id. */ private String latestKnownVersionId = null; + + /** The latest known version date. */ private Date latestKnownVersionDate = null; + + /** The type. */ private int type = 0; + /** The change set. */ private PmrChangeSet changeSet = null; + /** + * The Constructor. + * + * @param filePath the file path + * @param fileId the file id + */ public RelevantFile( String filePath, String fileId ) { this.filePath = filePath; this.fileId = fileId; } + /** + * The Constructor. + * + * @param filePath the file path + */ public RelevantFile( String filePath ) { this.filePath = filePath; } + /** + * Generate file id. + * + * @param repoUrl the repo url + * @return the string + * @throws UnsupportedEncodingException the unsupported encoding exception + */ public String generateFileId( String repoUrl ) throws UnsupportedEncodingException { this.repoUrl = repoUrl; return this.fileId = XmlFileRepository.generateFileId(repoUrl, filePath); } + /** + * Gets the file path. + * + * @return the file path + */ public String getFilePath() { return filePath; } + /** + * Gets the repository url. + * + * @return the repository url + */ public String getRepositoryUrl() { return repoUrl; } + /** + * Gets the file id. + * + * @return the file id + */ public String getFileId() { return fileId; } /** - * Sets the latest known Version of this model and the changeSet of it - * - * @param latestVersionId - * @param latestVersionDate - * @param changeSet + * Sets the latest known Version of this model and the changeSet of it. + * + * @param latestVersionId the latest version id + * @param latestVersionDate the latest version date + * @param changeSet the change set */ public void setLatestKnownVersion( String latestVersionId, Date latestVersionDate, PmrChangeSet changeSet ) { this.latestKnownVersionId = latestVersionId; @@ -57,10 +106,10 @@ public void setLatestKnownVersion( String latestVersionId, Date latestVersionDat } /** - * Sets the latest known Version of this model - * - * @param latestVersionId - * @param latestVersionDate + * Sets the latest known Version of this model. + * + * @param latestVersionId the latest version id + * @param latestVersionDate the latest version date */ public void setLatestKnownVersion( String latestVersionId, Date latestVersionDate ) { setLatestKnownVersion( latestVersionId, latestVersionDate, null ); @@ -133,8 +182,8 @@ public Date getLatestVersionDate() { } /** - * Return the changeSet or null, if no one was setted and no change added - * + * Return the changeSet or null, if no one was setted and no change added. + * * @return PmrChangeSet or null */ public PmrChangeSet getChangeSet() { @@ -143,8 +192,8 @@ public PmrChangeSet getChangeSet() { /** * Adds a change to the changeSet and creates one if necessary. - * - * @param change + * + * @param change the change */ public void addChange( PmrChange change ) { @@ -161,10 +210,20 @@ public void addChange( PmrChange change ) { changeSet.addChange(change); } + /** + * Gets the type. + * + * @return the type + */ public int getType() { return type; } + /** + * Sets the type. + * + * @param type the type + */ public void setType(int type) { this.type = type; } diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/exceptions/HttpException.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/exceptions/HttpException.java index 7584430..4dff418 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/exceptions/HttpException.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/databases/PMR2/exceptions/HttpException.java @@ -2,21 +2,45 @@ import java.io.IOException; +// TODO: Auto-generated Javadoc +/** + * The Class HttpException. + */ public class HttpException extends IOException { + /** The Constant serialVersionUID. */ private static final long serialVersionUID = 8155646399693553499L; + /** + * The Constructor. + */ public HttpException() { } + /** + * The Constructor. + * + * @param message the message + */ public HttpException(String message) { super(message); } + /** + * The Constructor. + * + * @param cause the cause + */ public HttpException(Throwable cause) { super(cause); } + /** + * The Constructor. + * + * @param message the message + * @param cause the cause + */ public HttpException(String message, Throwable cause) { super(message, cause); } diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/helper/CrawledModelRecord.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/helper/CrawledModelRecord.java index 4198c7f..7942d1d 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/helper/CrawledModelRecord.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/helper/CrawledModelRecord.java @@ -10,11 +10,24 @@ import de.unirostock.sems.morre.client.dataholder.CrawledModel; +// TODO: Auto-generated Javadoc +/** + * The Class CrawledModelRecord. + */ public class CrawledModelRecord extends CrawledModel { + /** The Constant serialVersionUID. */ private static final long serialVersionUID = 6382870895044981027L; + + /** The Constant DATE_FORMAT. */ public static final String DATE_FORMAT = "dd.MM.yyyy-HH:mm:ss"; + /** + * Extend dataholder. + * + * @param model the model + * @return the crawled model record + */ public static CrawledModelRecord extendDataholder( CrawledModel model ) { if( model != null ) return new CrawledModelRecord(model); @@ -23,24 +36,54 @@ public static CrawledModelRecord extendDataholder( CrawledModel model ) { } /** - * Creates a new CrawledModelRecord based on a simple CrawledModel Dataholder - * - * @param model + * Creates a new CrawledModelRecord based on a simple CrawledModel Dataholder. + * + * @param model the model */ public CrawledModelRecord( CrawledModel model ) { super( model.getFileId(), model.getVersionId(), model.getXmldoc(), model.getParentMap(), model.getMetaMap(), model.getModelType() ); } + /** + * The Constructor. + * + * @param fileId the file id + * @param versionId the version id + * @param xmldoc the xmldoc + * @param parentMap the parent map + * @param metaMap the meta map + * @param modelType the model type + */ public CrawledModelRecord(String fileId, String versionId, String xmldoc, Map> parentMap, Map metaMap, String modelType) { super(fileId, versionId, xmldoc, parentMap, metaMap, modelType); } + /** + * The Constructor. + * + * @param fileId the file id + * @param versionId the version id + * @param xmldoc the xmldoc + * @param parentMap the parent map + * @param metaMap the meta map + * @param modelType the model type + * @param versionDate the version date + * @param crawledDate the crawled date + */ public CrawledModelRecord(String fileId, String versionId, String xmldoc, Map> parentMap, Map metaMap, String modelType, Date versionDate, Date crawledDate) { super(fileId, versionId, xmldoc, parentMap, metaMap, modelType); setVersionDate(versionDate); setCrawledDate(crawledDate); } + /** + * The Constructor. + * + * @param fileId the file id + * @param versionId the version id + * @param versionDate the version date + * @param crawledDate the crawled date + */ public CrawledModelRecord(String fileId, String versionId, Date versionDate, Date crawledDate) { super(fileId, versionId, null, null, null, null); setVersionDate(versionDate); @@ -49,8 +92,8 @@ public CrawledModelRecord(String fileId, String versionId, Date versionDate, Dat /** * Checks if the model dataholder is valid. - * - * @return + * + * @return true, if checks if is available */ public boolean isAvailable() { @@ -67,10 +110,10 @@ public boolean isAvailable() { } /** - * Returns the value of a meta field or null - * - * @param metaField - * @return + * Returns the value of a meta field or null. + * + * @param metaField the meta field + * @return the meta */ public String getMeta( String metaField ) { Map metaMap = getMetaMap(); @@ -81,10 +124,10 @@ public String getMeta( String metaField ) { } /** - * Sets the value of a meta field and overrides the previous value - * - * @param metaField - * @param value + * Sets the value of a meta field and overrides the previous value. + * + * @param metaField the meta field + * @param value the value */ public void setMeta( String metaField, String value ) { Map metaMap = getMetaMap(); @@ -96,9 +139,9 @@ public void setMeta( String metaField, String value ) { } /** - * Returns the parsed VersionDate if it is set, or null - * - * @return + * Returns the parsed VersionDate if it is set, or null. + * + * @return the version date */ public Date getVersionDate() { Date versionDate = null; @@ -117,18 +160,18 @@ public Date getVersionDate() { } /** - * Sets the VersionDate in the Meta Field - * - * @param versionDate + * Sets the VersionDate in the Meta Field. + * + * @param versionDate the version date */ public void setVersionDate( Date versionDate ) { setMeta(META_VERSION_DATE, new SimpleDateFormat(DATE_FORMAT).format(versionDate) ); } /** - * Returns the parsed CrawledDate if it is set, or null - * - * @return + * Returns the parsed CrawledDate if it is set, or null. + * + * @return the crawled date */ public Date getCrawledDate() { Date crawledDate = null; @@ -147,9 +190,9 @@ public Date getCrawledDate() { } /** - * Sets the CrawledDate in the Meta Field - * - * @param versionDate + * Sets the CrawledDate in the Meta Field. + * + * @param crawledDate the crawled date */ public void setCrawledDate( Date crawledDate ) { setMeta(META_CRAWLED_DATE, new SimpleDateFormat(DATE_FORMAT).format(crawledDate) ); @@ -157,9 +200,9 @@ public void setCrawledDate( Date crawledDate ) { /** * Adds a parent to this model. - * - * @param parentFileId - * @param parentVersionId + * + * @param parentFileId the parent file id + * @param parentVersionId the parent version id */ public void addParent( String parentFileId, String parentVersionId ) { @@ -187,9 +230,9 @@ public void addParent( String parentFileId, String parentVersionId ) { } /** - * Adds a parent to this model with the same fileId - * - * @param parentVersionId + * Adds a parent to this model with the same fileId. + * + * @param parentVersionId the parent version id */ public void addParent( String parentVersionId ) { addParent( getFileId(), parentVersionId ); diff --git a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/helper/RelativPath.java b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/helper/RelativPath.java index d9cd744..82871ab 100644 --- a/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/helper/RelativPath.java +++ b/ModelCrawler/src/main/java/de/unirostock/sems/ModelCrawler/helper/RelativPath.java @@ -4,11 +4,15 @@ import java.io.IOException; import java.util.regex.Pattern; +// TODO: Auto-generated Javadoc +/** + * The Class RelativPath. + */ public class RelativPath { /** * Returns the path of one File relative to another.
- * copy'n'paste from {@link http://stackoverflow.com/a/1269907} + * copy'n'paste from stackoverflow * * @param target the target directory * @param base the base directory