001/* 002 * Syncany, www.syncany.org 003 * Copyright (C) 2011-2016 Philipp C. Heckel <philipp.heckel@gmail.com> 004 * 005 * This program is free software: you can redistribute it and/or modify 006 * it under the terms of the GNU General Public License as published by 007 * the Free Software Foundation, either version 3 of the License, or 008 * (at your option) any later version. 009 * 010 * This program is distributed in the hope that it will be useful, 011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 013 * GNU General Public License for more details. 014 * 015 * You should have received a copy of the GNU General Public License 016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 017 */ 018package org.syncany.chunk; 019 020import java.io.File; 021import java.io.FileInputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.security.MessageDigest; 025import java.util.logging.Level; 026import java.util.logging.Logger; 027 028/** 029 * The fixed chunker is an implementation of the {@link Chunker}. It implements a simple 030 * fixed-offset chunking, i.e. it breaks files at multiples of the given chunk size 031 * parameter. 032 * 033 * <p>While it is very fast due to its offset-based approach (and not content-based), it 034 * performs very badly when bytes are added or removed from the beginning of a file. 035 * 036 * <p>Details can be found in chapter 3.4 of the thesis at <a href="http://blog.philippheckel.com/2013/05/20/minimizing-remote-storage-usage-and-synchronization-time-using-deduplication-and-multichunking-syncany-as-an-example/3/#Fixed-Size%20Chunking">blog.philippheckel.com</a>. 037 * The <code>FixedChunker</code> implements the chunker described in chapter 3.4.2. 038 * 039 * @author Philipp C. Heckel (philipp.heckel@gmail.com) 040 */ 041public class FixedChunker extends Chunker { 042 private static final Logger logger = Logger.getLogger(FixedChunker.class.getSimpleName()); 043 044 public static final String DEFAULT_DIGEST_ALG = "SHA1"; 045 public static final String TYPE = "fixed"; 046 047 private int chunkSize; 048 private String checksumAlgorithm; 049 050 /** 051 * Creates a new fixed offset chunker with the default file/chunk 052 * checksum algorithm SHA1. 053 * 054 * @param chunkSize Size of a chunk in bytes 055 */ 056 public FixedChunker(int chunkSize) { 057 this(chunkSize, DEFAULT_DIGEST_ALG); 058 } 059 060 /** 061 * Creates a new fixed offset chunker. 062 * 063 * @param chunkSize Size of a chunk in bytes 064 * @param checksumAlgorithm Algorithm to calculare the chunk and file checksums (e.g. SHA1, MD5) 065 */ 066 public FixedChunker(int chunkSize, String checksumAlgorithm) { 067 this.chunkSize = chunkSize; 068 this.checksumAlgorithm = checksumAlgorithm; 069 } 070 071 @Override 072 public ChunkEnumeration createChunks(File file) throws IOException { 073 return new FixedChunkEnumeration(new FileInputStream(file)); 074 } 075 076 @Override 077 public String getChecksumAlgorithm() { 078 return checksumAlgorithm; 079 } 080 081 @Override 082 public String toString() { 083 return "Fixed-" + chunkSize + "-" + checksumAlgorithm; 084 } 085 086 public class FixedChunkEnumeration implements ChunkEnumeration { 087 private MessageDigest digest; 088 private MessageDigest fileDigest; 089 090 private InputStream in; 091 private byte[] buffer; 092 private boolean closed; 093 094 public FixedChunkEnumeration(InputStream in) { 095 this.in = in; 096 this.buffer = new byte[chunkSize]; 097 this.closed = false; 098 099 try { 100 this.digest = MessageDigest.getInstance(checksumAlgorithm); 101 this.fileDigest = MessageDigest.getInstance(checksumAlgorithm); 102 103 this.fileDigest.reset(); 104 } 105 catch (Exception e) { 106 throw new RuntimeException(e); 107 } 108 } 109 110 @Override 111 public boolean hasMoreElements() { 112 if (closed) { 113 return false; 114 } 115 116 try { 117 return in.available() > 0; 118 } 119 catch (IOException ex) { 120 if (logger.isLoggable(Level.WARNING)) { 121 logger.log(Level.WARNING, "Error while reading from file input stream.", ex); 122 } 123 124 return false; 125 } 126 } 127 128 @Override 129 public Chunk nextElement() { 130 try { 131 int read = in.read(buffer); 132 133 if (read == -1) { 134 return null; 135 } 136 137 // Close if this was the last bytes 138 if (in.available() == 0) { 139 in.close(); 140 closed = true; 141 } 142 143 // Chunk checksum 144 digest.reset(); 145 digest.update(buffer, 0, read); 146 147 // File checksum 148 fileDigest.update(buffer, 0, read); 149 byte[] fileChecksum = (closed) ? fileDigest.digest() : null; 150 151 // Create chunk 152 return new Chunk(digest.digest(), buffer, read, fileChecksum); 153 } 154 catch (IOException ex) { 155 logger.log(Level.SEVERE, "Error while retrieving next chunk.", ex); 156 return null; 157 } 158 } 159 160 @Override 161 public void close() { 162 try { 163 in.close(); 164 } 165 catch (IOException e) { 166 logger.log(Level.INFO, "Error while closing", e); 167 } 168 } 169 } 170}