001/*
002 * Syncany, www.syncany.org
003 * Copyright (C) 2011-2016 Philipp C. Heckel <philipp.heckel@gmail.com>
004 *
005 * This program is free software: you can redistribute it and/or modify
006 * it under the terms of the GNU General Public License as published by
007 * the Free Software Foundation, either version 3 of the License, or
008 * (at your option) any later version.
009 *
010 * This program is distributed in the hope that it will be useful,
011 * but WITHOUT ANY WARRANTY; without even the implied warranty of
012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
013 * GNU General Public License for more details.
014 *
015 * You should have received a copy of the GNU General Public License
016 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
017 */
018package org.syncany.chunk;
019
020import java.io.IOException;
021import java.io.InputStream;
022
023import org.syncany.database.MultiChunkEntry.MultiChunkId;
024
025/**
026 * A multichunk represents the container format that stores one to many {@link Chunk}s.
027 * Multichunks are created during the chunking/deduplication process by a {@link MultiChunker}.
028 *
029 * <p>There are two modes to handle multichunks:
030 *
031 * <ul>
032 *  <li>When a new multichunk is <i>written</i> and filled up with chunks, the {@link Deduper} makes sure that
033 *      chunks are only added until a multichunk's minimum size has been reached, and closes the
034 *      multichunk afterwards. During that process, the {@link #write(Chunk) write()} method is called
035 *      for each chunk, and {@link #isFull()} is checked for the size.
036 *
037 *  <li>When a multichunk is <i>read</i> from a file or an input stream, it can be processed sequentially using
038 *      the {@link #read()} method (not used in current code!), or in a random order using the
039 *      {@link #getChunkInputStream(byte[]) getChunkInputStream()} method. Because of the latter method,
040 *      <b>it is essential that random read access on a multichunk is possible</b>.
041 * </ul>
042 *
043 * @author Philipp C. Heckel (philipp.heckel@gmail.com)
044 */
045public abstract class MultiChunk {
046        protected MultiChunkId id;
047        protected long size;
048        protected int minSize; // in KB
049
050        /**
051         * Creates a new multichunk.
052         *
053         * <p>This method should be used if the multichunk identifier is known to the
054         * calling method. This is typically the case if a new multichunk is written.
055         *
056         * @param id Unique multichunk identifier (can be randomly chosen)
057         * @param minSize Minimum multichunk size, used to determine if chunks can still be added
058         */
059        public MultiChunk(MultiChunkId id, int minSize) {
060                this.id = id;
061                this.minSize = minSize;
062                this.size = 0;
063        }
064
065        /**
066         * Creates a new multichunk.
067         *
068         * <p>This method should be used if the multichunk identifier is <i>not</i> known to the
069         * calling method. This is typically the case if a multichunk is read from a file.
070         *
071         * @param minSize Minimum multichunk size, used to determine if chunks can still be added
072         */
073        public MultiChunk(int minSize) {
074                this(null, minSize);
075        }
076
077        /**
078         * In write mode, this method can be used to write {@link Chunk}s to a multichunk.
079         *
080         * <p>Implementations must increase the {@link #size} by the amount written to the multichunk
081         * (input size sufficient) and make sure that (if required) a header is written for the first
082         * chunk.
083         *
084         * <p>Implementations do not have to check whether or not a multichunk is full. This should be
085         * done outside the multichunker/multichunk as part of the deduplication algorithm in the {@link Deduper}.
086         *
087         * @param chunk Chunk to be written to the multichunk container
088         * @throws IOException If an exception occurs when writing to the multichunk
089         */
090        public abstract void write(Chunk chunk) throws IOException;
091
092        /**
093         * In read mode, this method can be used to <b>sequentially</b> read {@link Chunk}s from a multichunk.
094         * The method returns a chunk until no more chunks are available, at which point it will return
095         * <code>null</code>.
096         *
097         * <p>If random read access on a multichunk is desired, the
098         * {@link #getChunkInputStream(byte[]) getChunkInputStream()} method should be used instead.
099         *
100         * @return Returns the next chunk in the opened multichunk, or <code>null</code> if no chunk is available (anymore)
101         * @throws IOException If an exception occurs when reading from the multichunk
102         */
103        // TODO [low] Method is only used by tests, not necessary anymore? Required for 'cleanup'?
104        public abstract Chunk read() throws IOException;
105
106        /**
107         * In read mode, this method can be used to read {@link Chunk}s in <b>random access mode</b>, using a chunk
108         * checksum as identifier. The method returns a chunk input stream (the chunk's data) if the chunk is
109         * found, and <code>null</code> otherwise.
110         *
111         * <p>If all chunks are read from a multichunk sequentially, the {@link #read()} method should be used instead.
112         *
113         * @param checksum The checksum identifying a chunk instance
114         * @return Returns a chunk input stream (chunk data) if the chunk can be found in the multichunk, or <code>null</code> otherwise
115         * @throws IOException If an exception occurs when reading from the multichunk
116         */
117        // TODO [low] Method should be named 'read(checksum)' and return a Chunk object, not an input stream, right?!
118        public abstract InputStream getChunkInputStream(byte[] checksum) throws IOException;
119
120        /**
121         * Closes a multichunk after writing/reading.
122         *
123         * <p>Implementations should close the underlying input/output stream (depending on
124         * whether the chunk was opened in read or write mode.
125         *
126         * @throws IOException If an exception occurs when closing the multichunk
127         */
128        public abstract void close() throws IOException;
129
130        /**
131         * In write mode, this method determines the fill state of the multichunk and
132         * returns whether or not a new chunk can still be added. It is used by the
133         * {@link Deduper}.
134         *
135         * @return Returns <code>true</code> if no more chunks should be added and the chunk should be closed, <code>false</code> otherwise
136         */
137        public boolean isFull() {
138                return size >= minSize;
139        }
140
141        public long getSize() {
142                return size;
143        }
144
145        public MultiChunkId getId() {
146                return id;
147        }
148
149        public void setId(MultiChunkId id) {
150                this.id = id;
151        }
152
153        @Override
154        public int hashCode() {
155                final int prime = 31;
156                int result = 1;
157                result = prime * result + ((id == null) ? 0 : id.hashCode());
158                result = prime * result + minSize;
159                result = prime * result + (int) (size ^ (size >>> 32));
160                return result;
161        }
162
163        @Override
164        public boolean equals(Object obj) {
165                if (this == obj) {
166                        return true;
167                }
168                if (obj == null) {
169                        return false;
170                }
171                if (!(obj instanceof MultiChunk)) {
172                        return false;
173                }
174                MultiChunk other = (MultiChunk) obj;
175                if (id == null) {
176                        if (other.id != null) {
177                                return false;
178                        }
179                }
180                else if (!id.equals(other.id)) {
181                        return false;
182                }
183                if (minSize != other.minSize) {
184                        return false;
185                }
186                if (size != other.size) {
187                        return false;
188                }
189                return true;
190        }
191}