001/*
002 * Syncany, www.syncany.org
003 * Copyright (C) 2011-2016 Philipp C. Heckel <philipp.heckel@gmail.com> 
004 *
005 * This program is free software: you can redistribute it and/or modify
006 * it under the terms of the GNU General Public License as published by
007 * the Free Software Foundation, either version 3 of the License, or
008 * (at your option) any later version.
009 *
010 * This program is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
013 * GNU General Public License for more details.
014 *
015 * You should have received a copy of the GNU General Public License
016 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
017 */
018package org.syncany.chunk;
019
020import java.io.File;
021import java.io.IOException;
022import java.util.Enumeration;
023
024/**
025 * The chunker implements a core part of the deduplication process by breaking
026 * files into individual {@link Chunk}s. A chunker emits an enumeration of chunks,
027 * allowing the application to process one chunk after the other. 
028 * 
029 * <p>Note: Implementations should never read the entire file into memory at once,
030 *          but instead use an input stream for processing.
031 * 
032 * @author Philipp C. Heckel (philipp.heckel@gmail.com)
033 */
034public abstract class Chunker { 
035        /**
036         * Property used by the config to indicate the exact or 
037         * approximate size of a chunk. In bytes. 
038         */
039        public static final String PROPERTY_SIZE = "size";
040        
041    /**
042     * Opens the given file and creates enumeration of {@link Chunk}s. This method 
043     * should not read the file into memory at once, but instead read and emit new 
044     * chunks when requested using {@link Enumeration#nextElement() nextElement()}.
045     * 
046     * <p>The enumeration must be closed by the {@link ChunkEnumeration#close() close()} 
047     * method to remove any possible locks.
048     * 
049     * @param file The file that is supposed to be chunked
050     * @return An enumeration of individual chunks, must be closed at the end of processing
051     * @throws IOException If any file exceptions occur
052     */ 
053        public abstract ChunkEnumeration createChunks(File file) throws IOException;
054                        
055        /**
056         * Returns a string representation of the chunker implementation.
057         */
058    public abstract String toString();
059    
060    /**
061     * Returns the checksum algorithm used by the chunker to calculate the chunk
062     * and file checksums. For the deduplication process to function properly,
063     * the checksum algorithms of all chunkers must be equal. 
064     */
065    public abstract String getChecksumAlgorithm();
066    
067    /**
068     * The chunk enumeration is implemented by the actual chunkers and emits a new
069     * chunk when {@link ChunkEnumeration#nextElement() nextElement()} is called. When no more 
070     * elements are available, {@link ChunkEnumeration#hasMoreElements() hasMoreElements()} returns
071     * false. Any open streams must be closed with {@link ChunkEnumeration#close() close()}.
072     */
073    public static interface ChunkEnumeration extends Enumeration<Chunk> {
074        /**
075         * Returns true if the chunker can return at least one more chunk.
076         */
077        public boolean hasMoreElements();
078        
079        /**
080         * Returns the next chunk (if there are any). 
081         */
082        public Chunk nextElement();
083        
084        /**
085         * Closes the file opened by the {@link Chunker#createChunks(File) createChunks()} method.  
086         * This method must be called at the end of processing to release any read-/write locks.
087         */
088        public void close();   
089    }
090}