001/*
002 * Syncany, www.syncany.org
003 * Copyright (C) 2011-2016 Philipp C. Heckel <philipp.heckel@gmail.com>
004 *
005 * This program is free software: you can redistribute it and/or modify
006 * it under the terms of the GNU General Public License as published by
007 * the Free Software Foundation, either version 3 of the License, or
008 * (at your option) any later version.
009 *
010 * This program is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
013 * GNU General Public License for more details.
014 *
015 * You should have received a copy of the GNU General Public License
016 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
017 */
018package org.syncany.chunk;
019
020import java.io.File;
021import java.io.FileInputStream;
022import java.io.IOException;
023import java.io.InputStream;
024import java.security.MessageDigest;
025import java.util.logging.Level;
026import java.util.logging.Logger;
027
028/**
029 * The fixed chunker is an implementation of the {@link Chunker}. It implements a simple
030 * fixed-offset chunking, i.e. it breaks files at multiples of the given chunk size
031 * parameter.
032 *
033 * <p>While it is very fast due to its offset-based approach (and not content-based), it
034 * performs very badly when bytes are added or removed from the beginning of a file.
035 *
036 * <p>Details can be found in chapter 3.4 of the thesis at <a href="http://blog.philippheckel.com/2013/05/20/minimizing-remote-storage-usage-and-synchronization-time-using-deduplication-and-multichunking-syncany-as-an-example/3/#Fixed-Size%20Chunking">blog.philippheckel.com</a>.
037 * The <code>FixedChunker</code> implements the chunker described in chapter 3.4.2.
038 *
039 * @author Philipp C. Heckel (philipp.heckel@gmail.com)
040 */
041public class FixedChunker extends Chunker {
042        private static final Logger logger = Logger.getLogger(FixedChunker.class.getSimpleName());
043
044        public static final String DEFAULT_DIGEST_ALG = "SHA1";
045        public static final String TYPE = "fixed";
046
047        private int chunkSize;
048        private String checksumAlgorithm;
049
050        /**
051         * Creates a new fixed offset chunker with the default file/chunk 
052         * checksum algorithm SHA1.
053         * 
054         * @param chunkSize Size of a chunk in bytes
055         */
056        public FixedChunker(int chunkSize) {
057                this(chunkSize, DEFAULT_DIGEST_ALG);
058        }
059
060        /**
061         * Creates a new fixed offset chunker.
062         * 
063         * @param chunkSize Size of a chunk in bytes
064         * @param checksumAlgorithm Algorithm to calculare the chunk and file checksums (e.g. SHA1, MD5)
065         */
066        public FixedChunker(int chunkSize, String checksumAlgorithm) {
067                this.chunkSize = chunkSize;
068                this.checksumAlgorithm = checksumAlgorithm;
069        }
070
071        @Override
072        public ChunkEnumeration createChunks(File file) throws IOException {
073                return new FixedChunkEnumeration(new FileInputStream(file));
074        }
075
076        @Override
077        public String getChecksumAlgorithm() {
078                return checksumAlgorithm;
079        }
080
081        @Override
082        public String toString() {
083                return "Fixed-" + chunkSize + "-" + checksumAlgorithm;
084        }
085
086        public class FixedChunkEnumeration implements ChunkEnumeration {
087                private MessageDigest digest;
088                private MessageDigest fileDigest;
089
090                private InputStream in;
091                private byte[] buffer;
092                private boolean closed;
093
094                public FixedChunkEnumeration(InputStream in) {
095                        this.in = in;
096                        this.buffer = new byte[chunkSize];
097                        this.closed = false;
098
099                        try {
100                                this.digest = MessageDigest.getInstance(checksumAlgorithm);
101                                this.fileDigest = MessageDigest.getInstance(checksumAlgorithm);
102
103                                this.fileDigest.reset();
104                        }
105                        catch (Exception e) {
106                                throw new RuntimeException(e);
107                        }
108                }
109
110                @Override
111                public boolean hasMoreElements() {
112                        if (closed) {
113                                return false;
114                        }
115
116                        try {
117                                return in.available() > 0;
118                        }
119                        catch (IOException ex) {
120                                if (logger.isLoggable(Level.WARNING)) {
121                                        logger.log(Level.WARNING, "Error while reading from file input stream.", ex);
122                                }
123
124                                return false;
125                        }
126                }
127
128                @Override
129                public Chunk nextElement() {
130                        try {
131                                int read = in.read(buffer);
132
133                                if (read == -1) {
134                                        return null;
135                                }
136
137                                // Close if this was the last bytes
138                                if (in.available() == 0) {
139                                        in.close();
140                                        closed = true;
141                                }
142
143                                // Chunk checksum
144                                digest.reset();
145                                digest.update(buffer, 0, read);
146
147                                // File checksum
148                                fileDigest.update(buffer, 0, read);
149                                byte[] fileChecksum = (closed) ? fileDigest.digest() : null;
150
151                                // Create chunk
152                                return new Chunk(digest.digest(), buffer, read, fileChecksum);
153                        }
154                        catch (IOException ex) {
155                                logger.log(Level.SEVERE, "Error while retrieving next chunk.", ex);
156                                return null;
157                        }
158                }
159
160                @Override
161                public void close() {
162                        try {
163                                in.close();
164                        }
165                        catch (IOException e) {
166                                logger.log(Level.INFO, "Error while closing", e);
167                        }
168                }
169        }
170}