1
/* Copyright (c) 2008 PrimeBase Technologies GmbH, Germany
3
* PrimeBase Media Stream for MySQL
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
* Original author: Paul McCullagh
20
* Continued development: Barry Leslie
26
* Contains all the information about an open database.
30
#ifndef __REPOSITORY_MS_H__
31
#define __REPOSITORY_MS_H__
36
#include "Engine_ms.h"
40
#define MS_BLOB_HEADER_MAGIC 0x9213BA24
41
#define MS_REPO_FILE_MAGIC 0x5678CDEF
42
#define MS_REPO_FILE_VERSION 3
43
#define MS_REPO_FILE_HEAD_SIZE 128
46
#define MS_REPO_MIN_REF_COUNT 3 // Initial number of references to allow space for:(Table, Delete, Cloud)
47
#define MS_REPO_MIN_MATADATA 0
49
#define MS_REPO_MIN_REF_COUNT 6 // Initial number of references to allow space for.
50
#define MS_REPO_MIN_MATADATA 128
53
#define BLOB_IN_REPOSITORY(t) ( t < MS_CLOUD_STORAGE)
54
#define BLOB_IN_CLOUD(t) ( t == MS_CLOUD_STORAGE)
56
// References are marked as committed or uncommitted as an aid when
57
// doing a backup to indicate which references were added after the
59
#define COMMIT_MASK(id) ((id) & 0X7FFFFFFFFFFFFFFFll) // The high bit is used internally to flag uncommitted references.
60
#define IS_COMMITTED(id) (((id) & 0X8000000000000000ll) == 0)
61
#define UNCOMMITTED(id) ((id) | 0X8000000000000000ll)
66
class CSHTTPOutputStream;
68
/* Repository file structure:
69
MSRepoHeadRec:<BLOB_RECORDS>
71
BLOB_RECORDS: <BLOB_RECORD> <BLOB_RECORDS>
72
BLOB_RECORD: MSBlobHeadRec <BLOB_REFERENCES> BlobData
78
* In theory a database can containg repository records created with different versions of PBMS
79
* which have different repository header sizes. The reallity though is that this is not really
80
* supported yet. If this is ever supported the header data will have to be processed
81
* after being read from disk before it can be accessed. This will be left until it is actually needed.
83
typedef struct MSRepoHead {
84
CSDiskValue4 rh_magic_4; /* Table magic number. */
85
CSDiskValue2 rh_version_2; /* The header version. */
86
CSDiskValue2 rh_repo_head_size_2; /* The size of this header. */
87
CSDiskValue2 rh_blob_head_size_2; /* The size of this header for each blob sizeof(MSBlobHeadRec). */
88
CSDiskValue2 rh_def_ref_size_2; /* The default size of references. */
89
CSDiskValue8 rh_garbage_count_8;
91
/* NOTE: Keep the next 5 fields together (and in this order)
92
* they are written together in syncHead().
94
CSDiskValue8 rh_recovery_offset_8; /* The last confirmed, flushed offset (start recovery point)! */
95
CSDiskValue4 rh_last_temp_time_4; /* Time of the last temp BLOB in this log. */
96
CSDiskValue4 rh_last_access_4; /* Last access time (in seconds). */
97
CSDiskValue4 rh_create_time_4; /* Last access time (in seconds). */
98
CSDiskValue4 rh_last_ref_4; /* Last reference time (in seconds). */
100
CSDiskValue4 rh_reserved_4;
101
} MSRepoHeadRec, *MSRepoHeadPtr;
103
#define MS_BLOB_ALLOCATED 1 /* The BLOB exists but is scheduled for deletion. */
104
#define MS_BLOB_REFERENCED 2 /* The BLOB exists and is referenced. */
105
#define MS_BLOB_DELETED 3 /* The BLOB has been deleted and can be cleaned up.. */
106
#define MS_BLOB_MOVED 4 /* The BLOB was moved while a backup was in progress and can be cleaned up when the compactor is resumed. */
107
// The only difference between MS_BLOB_DELETED and MS_BLOB_MOVED is that the backup process will backup BLOBs that were moved.
109
#define VALID_BLOB_STATUS(s) (s >= MS_BLOB_ALLOCATED && s <= MS_BLOB_MOVED)
110
#define IN_USE_BLOB_STATUS(s) (s >= MS_BLOB_ALLOCATED && s <= MS_BLOB_REFERENCED)
112
#define MS_SHORT_AUTH_CODE(ac) ((uint16_t) (((ac) & 0x0000FFFF) ^ (ac) >> 16))
114
* BLOB record structure: {
115
{Blob Header} (See MSBlobHead below.)
116
{Blob references} (An array of rb_ref_count_2 reference records each of size rb_ref_size_1)
117
{Blob Metadata} (Null terminated string pairs of the format: <name> <value>)
118
{The BLOB!} (Depending on the type of BLOB storage being used this may be the actual blob data or a URL to it.)
122
* The blob alias is a special metadata tag that can be used as a key to access the blob.
123
* For this reason it is handled differently in that an index is defined on it.
125
typedef struct MSBlobHead {
127
* Important: rb_last_access_4 and rb_access_count_4 are always updated at the same time
128
* and are assumed to be in this order.
130
CSDiskValue4 rb_last_access_4; /* Last access time (in seconds). */
131
CSDiskValue4 rb_access_count_4; /* The number of times the BLOB has been read. */
132
CSDiskValue4 rb_create_time_4; /* Creation time (in seconds). */
133
CSDiskValue4 rd_magic_4; /* BLOB magic number. */
134
CSDiskValue1 rb_storage_type_1; /* The type of BLOB storage being used. */
136
CSDiskValue2 rb_ref_count_2; /* The number of reference slots in the header. They may not all be used. */
137
CSDiskValue1 rb_ref_size_1; /* The size of references in this header. */
138
CSDiskValue4 rb_mod_time_4; /* Last access modification time (in seconds). */
140
/* The header size may be oversize to allow for the addition of references and metadata before */
141
/* having to relocate the blob. The references array starts at the top of the variable header space */
142
/* and grows down while the metadata starts at the bottom and grows up. If the 2 spaces meet then */
143
/* a new BLOB record must be allocated and the entire BLOB relocated. :( */
145
CSDiskValue2 rb_head_size_2; /* The size of the entire header. (The offset from the start of the header to the BLOB data.)*/
146
CSDiskValue6 rb_blob_repo_size_6; /* The size of the blob data sotred in the repository. For repository BLOBs this is the same as rb_blob_data_size_6 */
147
CSDiskValue6 rb_blob_data_size_6; /* The size of the actual blob. */
148
Md5Digest rb_blob_checksum_md5d; /* The MD5 digest of the blob. */
150
CSDiskValue4 rb_alias_hash_4; /* The alias name hash value.*/
151
CSDiskValue2 rb_alias_offset_2; /* The offset from the start of the header to the BLOB metadata alias value if it exists.*/
152
CSDiskValue2 rb_mdata_offset_2; /* The offset from the start of the header to the BLOB metadata.*/
153
CSDiskValue2 rb_mdata_size_2; /* The size of the metadata.*/
156
* The rb_s3_key_id_4 field is used to generate a database wide
157
* unique persistent id for the BLOB that can be used as
160
* This is done by combining the rb_s3_key_id_4 with the rb_create_time_4.
163
CSDiskValue4 rb_s3_key_id_4;
166
* The rb_s3_cloud_ref_4 field is a reference into the pbms.pbms_cloud
167
* table containing S3 storage information.
169
CSDiskValue4 rb_s3_cloud_ref_4;
171
/* Reserved space to allow for new header fields without
172
* having to change the size of this header.
174
CSDiskValue4 rb_unused[2];
176
/* These are changed when referencing/dereferencing a BLOB: */
177
CSDiskValue1 rb_status_1;
178
CSDiskValue4 rb_backup_id_4; /* Used with the MS_BLOB_MOVED flag to indicate that a moved BLOB should be backed up. */
179
CSDiskValue4 rb_last_ref_4; /* Last reference time (in seconds). */
180
CSDiskValue4 rb_auth_code_4; /* Authorisation code. NOTE! Always last 4 bytes of the
181
* header of the header! */
183
} MSBlobHeadRec, *MSBlobHeadPtr;
184
#define MS_METADAT_OFFSET(header_size, current_metadata_size, metadata_size) (header_size - current_metadata_size - metadata_size)
185
#define MS_MIN_BLOB_HEAD_SIZE ((uint16_t)(offsetof(MSBlobHeadRec, rb_auth_code_4) + 4))
187
#define MS_VAR_SPACE(bh) ((CS_GET_DISK_2(bh->rb_head_size_2) - MS_MIN_BLOB_HEAD_SIZE) -(CS_GET_DISK_2(bh->rb_ref_count_2) * CS_GET_DISK_1(bh->rb_ref_size_1)) - CS_GET_DISK_2(bh->rb_mdata_size_2))
188
#define MS_CAN_ADD_REFS(bh, n) (MS_VAR_SPACE(bh) >= (n * CS_GET_DISK_1(bh->rb_ref_size_1)))
189
#define MS_CAN_ADD_MDATA(bh, l) (MS_VAR_SPACE(bh) >= l)
192
#define MS_BLOB_STAT_OFFS offsetof(MSBlobHeadRec, rb_storage_type_1)
193
#define MS_BLOB_META_OFFS offsetof(MSBlobHeadRec, rb_alias_offset_2)
195
#define MS_BLOB_FREE_REF 0x0000 /* A free reference */
196
#define MS_BLOB_TABLE_REF 0xFFFF /* A table reference */
197
#define MS_BLOB_DELETE_REF 0xFFFE /* A templog deletion reference */
199
#define INVALID_INDEX 0xFFFF
201
// This is a generic reference structure that is
202
// compatable with MSRepoTableRef, MSRepoTempRef, and MSRepoBlobRef
203
typedef struct MSRepoGenericRef {
204
CSDiskValue2 rr_type_2;
205
CSDiskValue2 rr_reserved_2;
206
uint8_t er_unused[8];
207
} MSRepoGenericRefRec, *MSRepoGenericRefPtr;
209
// Notes on references stored in the BLOB's repository header:
211
// For every table that has a reference to the BLOB there is
212
// 1 table ref (MSRepoTableRefRec) in the BLOB's header.
213
// For every reference to the BLOB from within the database tables
214
// there is 1 BLOB ref (MSRepoBlobRefRec) in the BLOB's header.
215
// The BLOB ref points to the BLOB's table ref in the header.
217
// If the same BLOB is referenced more than once from the same table
218
// there will only be one MSRepoTableRefRec for all the references but
219
// each reference will have its own MSRepoBlobRefRec.
222
// In addition there may be 1 or more temp log references used for
223
// performing delayed offline actions on the BLOB such as deleting
224
// it or moving it to a cloud.
226
// (BLOB aliases should be implimented as another type of reference.)
228
/* Points to a reference to the blob from a table. */
229
typedef struct MSRepoTableRef {
230
CSDiskValue2 rr_type_2; /* MS_BLOB_TABLE_REF */
231
CSDiskValue4 tr_table_id_4; /* Table ID (non-zero if valid). */
232
CSDiskValue6 tr_blob_id_6; /* Blob ID (non-zero if valid). (offset into the table refernce log.)*/
233
} MSRepoTableRefRec, *MSRepoTableRefPtr;
235
/* Points to a reference to the blob from a temp log. */
236
typedef struct MSRepoTempRef {
237
CSDiskValue2 rr_type_2; /* MS_BLOB_DELETE_REF */
238
CSDiskValue2 tp_del_ref_2; /* The index of reference to be removed. Index is 1 based.
239
* If set to INVALID_INDEX then this record is not related to a table reference. */
240
CSDiskValue4 tp_log_id_4; /* Temp log id. */
241
CSDiskValue4 tp_offset_4; /* Offset if temp log. */
242
} MSRepoTempRefRec, *MSRepoTempRefPtr;
245
// A blob reference is a backward reference from the BLOB
246
// back up into the table referencing it.
248
// Historicly it could have beeen used to access
249
// the referencing row via an engine callback. This is no longer supported.
250
// It is now used to store a unique ID for the BLOB reference. This is used
251
// to avoid possible multiple BLOB decrement or increment operations during
252
// recovery. They could also be used to locate the record referencing to the BLOB
255
// There is a 1:1 relationship between the number of blob references in
256
// a BLOB's header and the number of times that BLOB exists in tables in the
258
typedef struct MSRepoBlobRef {
259
CSDiskValue2 er_table_2; /* Index of the table reference (a MS_BLOB_TABLE_REF record) Index is 1 based. Can be -1 */
260
CSDiskValue2 er_col_index_2; /* The column index of the BLOB. */
261
CSDiskValue8 er_blob_ref_id_8; /* The unique ID of the BLOB reference.*/
262
} MSRepoBlobRefRec, *MSRepoBlobRefPtr;
264
typedef union MSRepoPointers {
267
MSBlobHeadPtr rp_head;
268
MSRepoGenericRefPtr rp_ref;
269
MSRepoTableRefPtr rp_tab_ref;
270
MSRepoTempRefPtr rp_temp_ref;
271
MSRepoBlobRefPtr rp_blob_ref;
272
} MSRepoPointersRec, *MSRepoPointersPtr;
274
#define MS_BLOB_KEY_SIZE 17
276
class MSRepoFile : public CSFile, public CSPooled {
278
MSRepository *myRepo;
280
MSRepoFile *nextFile; /* Next file available in the pool */
283
virtual ~MSRepoFile();
285
uint64_t readBlobChunk(PBMSBlobIDPtr blob_id, uint64_t rep_offset, uint64_t blob_offset, uint64_t buffer_size, char *buffer);
286
void writeBlobChunk(PBMSBlobIDPtr blob_id, uint64_t rep_offset, uint64_t blob_offset, uint64_t data_size, char *data);
287
//void sendBlob(MSOpenTable *otab, uint64_t offset, uint16_t head_size, uint64_t size, CSHTTPOutputStream *stream);
288
void sendBlob(MSOpenTable *otab, uint64_t offset, uint64_t req_offset, uint64_t req_size, uint32_t auth_code, bool with_auth_code, bool info_only, CSHTTPOutputStream *stream);
289
void retainBlob(MSOpenTable *otab, uint64_t offset, uint16_t head_size, uint32_t tab_id, uint64_t blob_id, uint64_t blob_ref_id, uint32_t auth_code, uint16_t col_index);
290
void referenceBlob(MSOpenTable *otab, uint64_t offset, uint16_t head_size, uint32_t tab_id, uint64_t blob_id, uint64_t blob_ref_id, uint32_t auth_code, uint16_t col_index);
291
void setBlobMetaData(MSOpenTable *otab, uint64_t offset, const char *meta_data, uint16_t meta_data_len, bool reset_alias, const char *alias);
292
void releaseBlob(MSOpenTable *otab, uint64_t offset, uint16_t head_size, uint32_t tab_id, uint64_t blob_id, uint64_t blob_ref_id, uint32_t auth_code);
293
void commitBlob(MSOpenTable *otab, uint64_t offset, uint16_t head_size, uint32_t tab_id, uint64_t blob_id, uint64_t blob_ref_id, uint32_t auth_code);
295
bool getBlobRefSpace(MSOpenTable *otab, uint64_t offset, uint16_t head_size, uint32_t tab_id, uint64_t blob_id, uint64_t blob_ref_id,
296
uint32_t auth_code, MSRepoTableRefPtr *tab_ref, MSRepoGenericRefPtr *free_ref, uint16_t *tab_ref_cnt, uint64_t *blob_size);
297
void realFreeBlob(MSOpenTable *otab, char *buffer, uint32_t auth_code, uint64_t offset, uint16_t head_size, uint64_t blob_size, size_t ref_size);
299
void freeTableReference(MSOpenTable *otab, uint64_t offset, uint16_t head_size, uint32_t tab_id, uint64_t blob_id, uint32_t auth_code);
300
void checkBlob(MSOpenTable *otab, CSStringBuffer *buffer, uint64_t offset, uint32_t auth_code, uint32_t temp_log_id, uint32_t temp_log_offset);
302
void updateAccess(MSBlobHeadPtr blob, uint64_t rep_offset);
303
virtual void returnToPool();
305
virtual CSObject *getNextLink() { return iNextLink; }
306
virtual CSObject *getPrevLink() { return iPrevLink; }
307
virtual void setNextLink(CSObject *link) { iNextLink = link; }
308
virtual void setPrevLink(CSObject *link) { iPrevLink = link; }
310
friend class MSRepository;
317
void update_blob_header(MSOpenTable *otab, uint64_t offset, uint64_t blob_size, uint16_t head_size, uint16_t new_head_size);
318
void removeBlob(MSOpenTable *otab, uint32_t tab_id, uint64_t blob_id, uint64_t offset, uint32_t auth_code);
319
static MSRepoFile *newRepoFile(MSRepository *repo, CSPath *path);
321
void updateGarbage(uint64_t size);
324
static void getBlobKey(MSBlobHeadPtr blob, CloudKeyPtr key)
326
key->creation_time = CS_GET_DISK_4(blob->rb_create_time_4);
327
key->ref_index = CS_GET_DISK_4(blob->rb_s3_key_id_4);
328
key->cloud_ref = CS_GET_DISK_4(blob->rb_s3_cloud_ref_4);
333
#define CS_REPO_REC_LOCK_COUNT 31
335
typedef enum RepoLockStates { // These states are actually bit masks
336
REPO_UNLOCKED = 0, // Repository is not locked by anyone.
337
REPO_COMPACTING = 1, // Repository is locked by the compactor thread.
338
REPO_WRITE = 2, // Repository is locked for writing a new BLOB to it.
339
REPO_BACKUP = 4 // Repository is locked for backup.
342
// The REPO_COMPACTING and REPO_WRITE states are mutualy exclusive but REPO_BACKUP is not.
345
// It is possible that when a repository is scheduled for backup it is already locked by the compactor thread
346
// or it is locked because a new BLOB is being written to it. In the cases where it is locked by the compactor,
347
// the compactore is suspended until the repository is backed up. In the case where a BLOB is being written
348
// to it both threads are allowed access to it and the resetting of the lock state is handled in returnToPool().
349
// It is safe to allow the backup thread to access the repository at the same time as other threads because
350
// backup is a read only operation.
351
class MSRepository : public CSSharedRefObject, public CSPooled {
354
off_t myRepoFileSize;
355
u_int myRepoLockState; // Bit mask of RepoLockStates
356
bool isRemovingFP; /* Set to true if the file pool is being removed. */
357
CSMutex myRepoLock[CS_REPO_REC_LOCK_COUNT];
358
MSDatabase *myRepoDatabase;
359
off_t myGarbageCount;
360
size_t myRepoHeadSize;
361
int myRepoDefRefSize;
362
size_t myRepoBlobHeadSize;
364
off_t myRecoveryOffset; /* The starting point for the next recovery. */
365
time_t myLastTempTime;
366
time_t myLastAccessTime;
367
time_t myLastCreateTime;
368
time_t myLastRefTime;
370
bool mustBeDeleted; /* Set to true if the repository should be deleted when freed. */
372
MSRepository(u_int id, MSDatabase *db, off_t file_size);
375
/* TODO: Check recovery after crash after each phase below. */
376
void openRepoFileForWriting(MSOpenTable *otab);
377
uint64_t receiveBlob(MSOpenTable *otab, uint16_t head_size, uint64_t blob_size, Md5Digest *checksum = NULL, CSInputStream *stream = NULL);
378
uint64_t copyBlob(MSOpenTable *otab, uint64_t size, CSInputStream *stream); // Makes a copy of the complete BLOB with header.
379
void writeBlobHead(MSOpenTable *otab, uint64_t offset, uint8_t ref_size, uint16_t head_size, uint64_t size, Md5Digest *checksum, char *metadata, uint16_t metadata_size, uint64_t blob_id, uint32_t auth_code, uint32_t log_id, uint32_t log_offset, uint8_t blob_type, CloudKeyPtr cloud_key);
380
void resetBlobHead(MSOpenTable *otab, uint64_t offset, uint16_t head_size, uint64_t blob_id, uint64_t blob_ref_id, uint32_t auth_code, uint16_t col_index, uint8_t blob_type); // Resets the BLOB header after it has been copied.
381
//void writeBlobHead(MSOpenTable *otab, uint64_t offset, uint32_t access_time, uint32_t create_time, uint8_t ref_size, uint16_t head_size, uint64_t blob_size, Md5Digest *checksum, uint16_t metadata_size, uint64_t blob_id, uint32_t auth_code, uint16_t col_index, PBMSEngineRefPtr eng_ref);
382
void setRepoFileSize(MSOpenTable *otab, off_t offset);
383
void syncHead(MSRepoFile *fh);
384
MSRepoFile *openRepoFile();
386
virtual void returnToPool();
388
MSRepoFile *getRepoFile();
389
void addRepoFile(MSRepoFile *file);
390
void removeRepoFile(MSRepoFile *file);
391
void returnRepoFile(MSRepoFile *file);
393
bool removeRepoFilesNotInUse(); /* Return true if all files have been removed. */
395
uint16_t getDefaultHeaderSize(uint16_t metadata_size) { return myRepoBlobHeadSize + ((metadata_size)?metadata_size:MS_REPO_MIN_MATADATA) + myRepoDefRefSize * MS_REPO_MIN_REF_COUNT;}
396
off_t getRepoFileSize();
397
size_t getRepoHeadSize();
398
size_t getRepoBlobHeadSize();
399
CSMutex *getRepoLock(off_t offset);
401
u_int getGarbageLevel();
404
bool lockedForBackup();
405
void backupCompleted();
406
bool isRepoLocked() { return myRepoXLock;}
407
void lockRepo(RepoLockState state);
408
void unlockRepo(RepoLockState state);
410
friend class MSRepoFile;
414
/* The read file pool: */
415
MSRepoFile *iFilePool; /* A list of files currently not in use. */
416
CSLinkedList iPoolFiles; /* A list of all files in this pool */
418
CSPath *getRepoFilePath();
419
void signalCompactor();
422
static int gGarbageThreshold;