001/* 002 * Syncany, www.syncany.org 003 * Copyright (C) 2011-2016 Philipp C. Heckel <philipp.heckel@gmail.com> 004 * 005 * This program is free software: you can redistribute it and/or modify 006 * it under the terms of the GNU General Public License as published by 007 * the Free Software Foundation, either version 3 of the License, or 008 * (at your option) any later version. 009 * 010 * This program is distributed in the hope that it will be useful, 011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 013 * GNU General Public License for more details. 014 * 015 * You should have received a copy of the GNU General Public License 016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 017 */ 018package org.syncany.chunk; 019 020import java.io.File; 021import java.io.IOException; 022import java.util.Enumeration; 023 024/** 025 * The chunker implements a core part of the deduplication process by breaking 026 * files into individual {@link Chunk}s. A chunker emits an enumeration of chunks, 027 * allowing the application to process one chunk after the other. 028 * 029 * <p>Note: Implementations should never read the entire file into memory at once, 030 * but instead use an input stream for processing. 031 * 032 * @author Philipp C. Heckel (philipp.heckel@gmail.com) 033 */ 034public abstract class Chunker { 035 /** 036 * Property used by the config to indicate the exact or 037 * approximate size of a chunk. In bytes. 038 */ 039 public static final String PROPERTY_SIZE = "size"; 040 041 /** 042 * Opens the given file and creates enumeration of {@link Chunk}s. This method 043 * should not read the file into memory at once, but instead read and emit new 044 * chunks when requested using {@link Enumeration#nextElement() nextElement()}. 045 * 046 * <p>The enumeration must be closed by the {@link ChunkEnumeration#close() close()} 047 * method to remove any possible locks. 048 * 049 * @param file The file that is supposed to be chunked 050 * @return An enumeration of individual chunks, must be closed at the end of processing 051 * @throws IOException If any file exceptions occur 052 */ 053 public abstract ChunkEnumeration createChunks(File file) throws IOException; 054 055 /** 056 * Returns a string representation of the chunker implementation. 057 */ 058 public abstract String toString(); 059 060 /** 061 * Returns the checksum algorithm used by the chunker to calculate the chunk 062 * and file checksums. For the deduplication process to function properly, 063 * the checksum algorithms of all chunkers must be equal. 064 */ 065 public abstract String getChecksumAlgorithm(); 066 067 /** 068 * The chunk enumeration is implemented by the actual chunkers and emits a new 069 * chunk when {@link ChunkEnumeration#nextElement() nextElement()} is called. When no more 070 * elements are available, {@link ChunkEnumeration#hasMoreElements() hasMoreElements()} returns 071 * false. Any open streams must be closed with {@link ChunkEnumeration#close() close()}. 072 */ 073 public static interface ChunkEnumeration extends Enumeration<Chunk> { 074 /** 075 * Returns true if the chunker can return at least one more chunk. 076 */ 077 public boolean hasMoreElements(); 078 079 /** 080 * Returns the next chunk (if there are any). 081 */ 082 public Chunk nextElement(); 083 084 /** 085 * Closes the file opened by the {@link Chunker#createChunks(File) createChunks()} method. 086 * This method must be called at the end of processing to release any read-/write locks. 087 */ 088 public void close(); 089 } 090}