001/* 002 * Syncany, www.syncany.org 003 * Copyright (C) 2011-2016 Philipp C. Heckel <philipp.heckel@gmail.com> 004 * 005 * This program is free software: you can redistribute it and/or modify 006 * it under the terms of the GNU General Public License as published by 007 * the Free Software Foundation, either version 3 of the License, or 008 * (at your option) any later version. 009 * 010 * This program is distributed in the hope that it will be useful, 011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 013 * GNU General Public License for more details. 014 * 015 * You should have received a copy of the GNU General Public License 016 * along with this program. If not, see <http://www.gnu.org/licenses/>. 017 */ 018package org.syncany.chunk; 019 020import java.io.IOException; 021import java.io.InputStream; 022 023import org.syncany.database.MultiChunkEntry.MultiChunkId; 024 025/** 026 * A multichunk represents the container format that stores one to many {@link Chunk}s. 027 * Multichunks are created during the chunking/deduplication process by a {@link MultiChunker}. 028 * 029 * <p>There are two modes to handle multichunks: 030 * 031 * <ul> 032 * <li>When a new multichunk is <i>written</i> and filled up with chunks, the {@link Deduper} makes sure that 033 * chunks are only added until a multichunk's minimum size has been reached, and closes the 034 * multichunk afterwards. During that process, the {@link #write(Chunk) write()} method is called 035 * for each chunk, and {@link #isFull()} is checked for the size. 036 * 037 * <li>When a multichunk is <i>read</i> from a file or an input stream, it can be processed sequentially using 038 * the {@link #read()} method (not used in current code!), or in a random order using the 039 * {@link #getChunkInputStream(byte[]) getChunkInputStream()} method. Because of the latter method, 040 * <b>it is essential that random read access on a multichunk is possible</b>. 041 * </ul> 042 * 043 * @author Philipp C. Heckel (philipp.heckel@gmail.com) 044 */ 045public abstract class MultiChunk { 046 protected MultiChunkId id; 047 protected long size; 048 protected int minSize; // in KB 049 050 /** 051 * Creates a new multichunk. 052 * 053 * <p>This method should be used if the multichunk identifier is known to the 054 * calling method. This is typically the case if a new multichunk is written. 055 * 056 * @param id Unique multichunk identifier (can be randomly chosen) 057 * @param minSize Minimum multichunk size, used to determine if chunks can still be added 058 */ 059 public MultiChunk(MultiChunkId id, int minSize) { 060 this.id = id; 061 this.minSize = minSize; 062 this.size = 0; 063 } 064 065 /** 066 * Creates a new multichunk. 067 * 068 * <p>This method should be used if the multichunk identifier is <i>not</i> known to the 069 * calling method. This is typically the case if a multichunk is read from a file. 070 * 071 * @param minSize Minimum multichunk size, used to determine if chunks can still be added 072 */ 073 public MultiChunk(int minSize) { 074 this(null, minSize); 075 } 076 077 /** 078 * In write mode, this method can be used to write {@link Chunk}s to a multichunk. 079 * 080 * <p>Implementations must increase the {@link #size} by the amount written to the multichunk 081 * (input size sufficient) and make sure that (if required) a header is written for the first 082 * chunk. 083 * 084 * <p>Implementations do not have to check whether or not a multichunk is full. This should be 085 * done outside the multichunker/multichunk as part of the deduplication algorithm in the {@link Deduper}. 086 * 087 * @param chunk Chunk to be written to the multichunk container 088 * @throws IOException If an exception occurs when writing to the multichunk 089 */ 090 public abstract void write(Chunk chunk) throws IOException; 091 092 /** 093 * In read mode, this method can be used to <b>sequentially</b> read {@link Chunk}s from a multichunk. 094 * The method returns a chunk until no more chunks are available, at which point it will return 095 * <code>null</code>. 096 * 097 * <p>If random read access on a multichunk is desired, the 098 * {@link #getChunkInputStream(byte[]) getChunkInputStream()} method should be used instead. 099 * 100 * @return Returns the next chunk in the opened multichunk, or <code>null</code> if no chunk is available (anymore) 101 * @throws IOException If an exception occurs when reading from the multichunk 102 */ 103 // TODO [low] Method is only used by tests, not necessary anymore? Required for 'cleanup'? 104 public abstract Chunk read() throws IOException; 105 106 /** 107 * In read mode, this method can be used to read {@link Chunk}s in <b>random access mode</b>, using a chunk 108 * checksum as identifier. The method returns a chunk input stream (the chunk's data) if the chunk is 109 * found, and <code>null</code> otherwise. 110 * 111 * <p>If all chunks are read from a multichunk sequentially, the {@link #read()} method should be used instead. 112 * 113 * @param checksum The checksum identifying a chunk instance 114 * @return Returns a chunk input stream (chunk data) if the chunk can be found in the multichunk, or <code>null</code> otherwise 115 * @throws IOException If an exception occurs when reading from the multichunk 116 */ 117 // TODO [low] Method should be named 'read(checksum)' and return a Chunk object, not an input stream, right?! 118 public abstract InputStream getChunkInputStream(byte[] checksum) throws IOException; 119 120 /** 121 * Closes a multichunk after writing/reading. 122 * 123 * <p>Implementations should close the underlying input/output stream (depending on 124 * whether the chunk was opened in read or write mode. 125 * 126 * @throws IOException If an exception occurs when closing the multichunk 127 */ 128 public abstract void close() throws IOException; 129 130 /** 131 * In write mode, this method determines the fill state of the multichunk and 132 * returns whether or not a new chunk can still be added. It is used by the 133 * {@link Deduper}. 134 * 135 * @return Returns <code>true</code> if no more chunks should be added and the chunk should be closed, <code>false</code> otherwise 136 */ 137 public boolean isFull() { 138 return size >= minSize; 139 } 140 141 public long getSize() { 142 return size; 143 } 144 145 public MultiChunkId getId() { 146 return id; 147 } 148 149 public void setId(MultiChunkId id) { 150 this.id = id; 151 } 152 153 @Override 154 public int hashCode() { 155 final int prime = 31; 156 int result = 1; 157 result = prime * result + ((id == null) ? 0 : id.hashCode()); 158 result = prime * result + minSize; 159 result = prime * result + (int) (size ^ (size >>> 32)); 160 return result; 161 } 162 163 @Override 164 public boolean equals(Object obj) { 165 if (this == obj) { 166 return true; 167 } 168 if (obj == null) { 169 return false; 170 } 171 if (!(obj instanceof MultiChunk)) { 172 return false; 173 } 174 MultiChunk other = (MultiChunk) obj; 175 if (id == null) { 176 if (other.id != null) { 177 return false; 178 } 179 } 180 else if (!id.equals(other.id)) { 181 return false; 182 } 183 if (minSize != other.minSize) { 184 return false; 185 } 186 if (size != other.size) { 187 return false; 188 } 189 return true; 190 } 191}