1
/* Copyright (C) 2008 PrimeBase Technologies GmbH, Germany
3
* PrimeBase Media Stream for MySQL
5
* This program is free software; you can redistribute it and/or modify
6
* it under the terms of the GNU General Public License as published by
7
* the Free Software Foundation; either version 2 of the License, or
8
* (at your option) any later version.
10
* This program is distributed in the hope that it will be useful,
11
* but WITHOUT ANY WARRANTY; without even the implied warranty of
12
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
* GNU General Public License for more details.
15
* You should have received a copy of the GNU General Public License
16
* along with this program; if not, write to the Free Software
17
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19
* Original author: Paul McCullagh
20
* Continued development: Barry Leslie
26
* Contains all the information about an open database.
31
#ifndef __REPOSITORY_MS_H__
32
#define __REPOSITORY_MS_H__
35
#include "cslib/CSDefs.h"
36
#include "cslib/CSFile.h"
37
#include "cslib/CSMd5.h"
38
#include "engine_ms.h"
42
#define MS_BLOB_HEADER_MAGIC 0x9213BA24
43
#define MS_REPO_FILE_MAGIC 0x5678CDEF
44
#define MS_REPO_FILE_VERSION 3
45
#define MS_REPO_FILE_HEAD_SIZE 128
48
#define MS_REPO_MIN_REF_COUNT 3 // Initial number of references to allow space for:(Table, Delete, Cloud)
49
#define MS_REPO_MIN_MATADATA 0
51
#define MS_REPO_MIN_REF_COUNT 6 // Initial number of references to allow space for.
52
#define MS_REPO_MIN_MATADATA 128
55
#define BLOB_IN_REPOSITORY(t) ( t < MS_CLOUD_STORAGE)
56
#define BLOB_IN_CLOUD(t) ( t == MS_CLOUD_STORAGE)
58
// References are marked as committed or uncommitted as an aid when
59
// doing a backup to indicate which references were added after the
61
#define COMMIT_MASK(id) ((id) & 0X7FFFFFFFFFFFFFFFll) // The high bit is used internally to flag uncommitted references.
62
#define IS_COMMITTED(id) (((id) & 0X8000000000000000ll) == 0)
63
#define UNCOMMITTED(id) ((id) | 0X8000000000000000ll)
68
class CSHTTPOutputStream;
70
/* Repository file structure:
71
MSRepoHeadRec:<BLOB_RECORDS>
73
BLOB_RECORDS: <BLOB_RECORD> <BLOB_RECORDS>
74
BLOB_RECORD: MSBlobHeadRec <BLOB_REFERENCES> BlobData
80
* In theory a database can containg repository records created with different versions of PBMS
81
* which have different repository header sizes. The reallity though is that this is not really
82
* supported yet. If this is ever supported the header data will have to be processed
83
* after being read from disk before it can be accessed. This will be left until it is actually needed.
85
typedef struct MSRepoHead {
86
CSDiskValue4 rh_magic_4; /* Table magic number. */
87
CSDiskValue2 rh_version_2; /* The header version. */
88
CSDiskValue2 rh_repo_head_size_2; /* The size of this header. */
89
CSDiskValue2 rh_blob_head_size_2; /* The size of this header for each blob sizeof(MSBlobHeadRec). */
90
CSDiskValue2 rh_def_ref_size_2; /* The default size of references. */
91
CSDiskValue8 rh_garbage_count_8;
93
/* NOTE: Keep the next 5 fields together (and in this order)
94
* they are written together in syncHead().
96
CSDiskValue8 rh_recovery_offset_8; /* The last confirmed, flushed offset (start recovery point)! */
97
CSDiskValue4 rh_last_temp_time_4; /* Time of the last temp BLOB in this log. */
98
CSDiskValue4 rh_last_access_4; /* Last access time (in seconds). */
99
CSDiskValue4 rh_create_time_4; /* Last access time (in seconds). */
100
CSDiskValue4 rh_last_ref_4; /* Last reference time (in seconds). */
102
CSDiskValue4 rh_reserved_4;
103
} MSRepoHeadRec, *MSRepoHeadPtr;
105
#define MS_BLOB_ALLOCATED 1 /* The BLOB exists but is scheduled for deletion. */
106
#define MS_BLOB_REFERENCED 2 /* The BLOB exists and is referenced. */
107
#define MS_BLOB_DELETED 3 /* The BLOB has been deleted and can be cleaned up.. */
108
#define MS_BLOB_MOVED 4 /* The BLOB was moved while a backup was in progress and can be cleaned up when the compactor is resumed. */
109
// The only difference between MS_BLOB_DELETED and MS_BLOB_MOVED is that the backup process will backup BLOBs that were moved.
111
#define VALID_BLOB_STATUS(s) (s >= MS_BLOB_ALLOCATED && s <= MS_BLOB_MOVED)
112
#define IN_USE_BLOB_STATUS(s) (s >= MS_BLOB_ALLOCATED && s <= MS_BLOB_REFERENCED)
114
#define MS_SHORT_AUTH_CODE(ac) ((uint16_t) (((ac) & 0x0000FFFF) ^ (ac) >> 16))
116
* BLOB record structure: {
117
{Blob Header} (See MSBlobHead below.)
118
{Blob references} (An array of rb_ref_count_2 reference records each of size rb_ref_size_1)
119
{Blob Metadata} (Null terminated string pairs of the format: <name> <value>)
120
{The BLOB!} (Depending on the type of BLOB storage being used this may be the actual blob data or a URL to it.)
124
* The blob alias is a special metadata tag that can be used as a key to access the blob.
125
* For this reason it is handled differently in that an index is defined on it.
127
typedef struct MSBlobHead {
129
* Important: rb_last_access_4 and rb_access_count_4 are always updated at the same time
130
* and are assumed to be in this order.
132
CSDiskValue4 rb_last_access_4; /* Last access time (in seconds). */
133
CSDiskValue4 rb_access_count_4; /* The number of times the BLOB has been read. */
134
CSDiskValue4 rb_create_time_4; /* Creation time (in seconds). */
135
CSDiskValue4 rd_magic_4; /* BLOB magic number. */
136
CSDiskValue1 rb_storage_type_1; /* The type of BLOB storage being used. */
138
CSDiskValue2 rb_ref_count_2; /* The number of reference slots in the header. They may not all be used. */
139
CSDiskValue1 rb_ref_size_1; /* The size of references in this header. */
140
CSDiskValue4 rb_mod_time_4; /* Last access modification time (in seconds). */
142
/* The header size may be oversize to allow for the addition of references and metadata before */
143
/* having to relocate the blob. The references array starts at the top of the variable header space */
144
/* and grows down while the metadata starts at the bottom and grows up. If the 2 spaces meet then */
145
/* a new BLOB record must be allocated and the entire BLOB relocated. :( */
147
CSDiskValue2 rb_head_size_2; /* The size of the entire header. (The offset from the start of the header to the BLOB data.)*/
148
CSDiskValue6 rb_blob_repo_size_6; /* The size of the blob data sotred in the repository. For repository BLOBs this is the same as rb_blob_data_size_6 */
149
CSDiskValue6 rb_blob_data_size_6; /* The size of the actual blob. */
150
Md5Digest rb_blob_checksum_md5d; /* The MD5 digest of the blob. */
152
CSDiskValue4 rb_alias_hash_4; /* The alias name hash value.*/
153
CSDiskValue2 rb_alias_offset_2; /* The offset from the start of the header to the BLOB metadata alias value if it exists.*/
154
CSDiskValue2 rb_mdata_offset_2; /* The offset from the start of the header to the BLOB metadata.*/
155
CSDiskValue2 rb_mdata_size_2; /* The size of the metadata.*/
158
* The rb_s3_key_id_4 field is used to generate a database wide
159
* unique persistent id for the BLOB that can be used as
162
* This is done by combining the rb_s3_key_id_4 with the rb_create_time_4.
165
CSDiskValue4 rb_s3_key_id_4;
168
* The rb_s3_cloud_ref_4 field is a reference into the pbms.pbms_cloud
169
* table containing S3 storage information.
171
CSDiskValue4 rb_s3_cloud_ref_4;
173
/* Reserved space to allow for new header fields without
174
* having to change the size of this header.
176
CSDiskValue4 rb_unused[2];
178
/* These are changed when referencing/dereferencing a BLOB: */
179
CSDiskValue1 rb_status_1;
180
CSDiskValue4 rb_backup_id_4; /* Used with the MS_BLOB_MOVED flag to indicate that a moved BLOB should be backed up. */
181
CSDiskValue4 rb_last_ref_4; /* Last reference time (in seconds). */
182
CSDiskValue4 rb_auth_code_4; /* Authorisation code. NOTE! Always last 4 bytes of the
183
* header of the header! */
185
} MSBlobHeadRec, *MSBlobHeadPtr;
186
#define MS_METADAT_OFFSET(header_size, current_metadata_size, metadata_size) (header_size - current_metadata_size - metadata_size)
187
#define MS_MIN_BLOB_HEAD_SIZE ((uint16_t)(offsetof(MSBlobHeadRec, rb_auth_code_4) + 4))
189
#define MS_VAR_SPACE(bh) ((int32_t)((CS_GET_DISK_2(bh->rb_head_size_2) - MS_MIN_BLOB_HEAD_SIZE) -(CS_GET_DISK_2(bh->rb_ref_count_2) * CS_GET_DISK_1(bh->rb_ref_size_1)) - CS_GET_DISK_2(bh->rb_mdata_size_2)))
190
#define MS_CAN_ADD_REFS(bh, n) (MS_VAR_SPACE(bh) >= (int32_t)(n * CS_GET_DISK_1(bh->rb_ref_size_1)))
191
#define MS_CAN_ADD_MDATA(bh, l) (MS_VAR_SPACE(bh) >= (int32_t)l)
194
#define MS_BLOB_STAT_OFFS offsetof(MSBlobHeadRec, rb_status_1)
195
#define MS_BLOB_META_OFFS offsetof(MSBlobHeadRec, rb_alias_offset_2)
197
#define MS_BLOB_FREE_REF 0x0000 /* A free reference */
198
#define MS_BLOB_TABLE_REF 0xFFFF /* A table reference */
199
#define MS_BLOB_DELETE_REF 0xFFFE /* A templog deletion reference */
201
#define INVALID_INDEX 0xFFFF
203
// This is a generic reference structure that is
204
// compatable with MSRepoTableRef, MSRepoTempRef, and MSRepoBlobRef
205
typedef struct MSRepoGenericRef {
206
CSDiskValue2 rr_type_2;
207
CSDiskValue2 rr_reserved_2;
208
uint8_t er_unused[8];
209
} MSRepoGenericRefRec, *MSRepoGenericRefPtr;
211
// Notes on references stored in the BLOB's repository header:
213
// For every table that has a reference to the BLOB there is
214
// 1 table ref (MSRepoTableRefRec) in the BLOB's header.
215
// For every reference to the BLOB from within the database tables
216
// there is 1 BLOB ref (MSRepoBlobRefRec) in the BLOB's header.
217
// The BLOB ref points to the BLOB's table ref in the header.
219
// If the same BLOB is referenced more than once from the same table
220
// there will only be one MSRepoTableRefRec for all the references but
221
// each reference will have its own MSRepoBlobRefRec.
224
// In addition there may be 1 or more temp log references used for
225
// performing delayed offline actions on the BLOB such as deleting
226
// it or moving it to a cloud.
228
// (BLOB aliases should be implimented as another type of reference.)
230
/* Points to a reference to the blob from a table. */
231
typedef struct MSRepoTableRef {
232
CSDiskValue2 rr_type_2; /* MS_BLOB_TABLE_REF */
233
CSDiskValue4 tr_table_id_4; /* Table ID (non-zero if valid). */
234
CSDiskValue6 tr_blob_id_6; /* Blob ID (non-zero if valid). (offset into the table refernce log.)*/
235
} MSRepoTableRefRec, *MSRepoTableRefPtr;
237
/* Points to a reference to the blob from a temp log. */
238
typedef struct MSRepoTempRef {
239
CSDiskValue2 rr_type_2; /* MS_BLOB_DELETE_REF */
240
CSDiskValue2 tp_del_ref_2; /* The index of reference to be removed. Index is 1 based.
241
* If set to INVALID_INDEX then this record is not related to a table reference. */
242
CSDiskValue4 tp_log_id_4; /* Temp log id. */
243
CSDiskValue4 tp_offset_4; /* Offset if temp log. */
244
} MSRepoTempRefRec, *MSRepoTempRefPtr;
247
// A blob reference is a backward reference from the BLOB
248
// back up into the table referencing it.
250
// Historicly it could have beeen used to access
251
// the referencing row via an engine callback. This is no longer supported.
252
// It is now used to store a unique ID for the BLOB reference. This is used
253
// to avoid possible multiple BLOB decrement or increment operations during
254
// recovery. They could also be used to locate the record referencing to the BLOB
257
// There is a 1:1 relationship between the number of blob references in
258
// a BLOB's header and the number of times that BLOB exists in tables in the
260
typedef struct MSRepoBlobRef {
261
CSDiskValue2 er_table_2; /* Index of the table reference (a MS_BLOB_TABLE_REF record) Index is 1 based. Can be -1 */
262
CSDiskValue2 er_col_index_2; /* The column index of the BLOB. */
263
CSDiskValue8 er_blob_ref_id_8; /* The unique ID of the BLOB reference.*/
264
} MSRepoBlobRefRec, *MSRepoBlobRefPtr;
266
typedef union MSRepoPointers {
269
MSBlobHeadPtr rp_head;
270
MSRepoGenericRefPtr rp_ref;
271
MSRepoTableRefPtr rp_tab_ref;
272
MSRepoTempRefPtr rp_temp_ref;
273
MSRepoBlobRefPtr rp_blob_ref;
274
} MSRepoPointersRec, *MSRepoPointersPtr;
276
#define MS_BLOB_KEY_SIZE 17
278
class MSRepoFile : public CSFile, public CSPooled {
280
MSRepository *myRepo;
282
MSRepoFile *nextFile; /* Next file available in the pool */
285
virtual ~MSRepoFile();
287
uint64_t readBlobChunk(PBMSBlobIDPtr blob_id, uint64_t rep_offset, uint64_t blob_offset, uint64_t buffer_size, char *buffer);
288
void writeBlobChunk(PBMSBlobIDPtr blob_id, uint64_t rep_offset, uint64_t blob_offset, uint64_t data_size, char *data);
289
//void sendBlob(MSOpenTable *otab, uint64_t offset, uint16_t head_size, uint64_t size, CSHTTPOutputStream *stream);
290
void sendBlob(MSOpenTable *otab, uint64_t offset, uint64_t req_offset, uint64_t req_size, uint32_t auth_code, bool with_auth_code, bool info_only, CSHTTPOutputStream *stream);
291
void referenceBlob(MSOpenTable *otab, uint64_t offset, uint16_t head_size, uint32_t tab_id, uint64_t blob_id, uint64_t blob_ref_id, uint32_t auth_code, uint16_t col_index);
292
void setBlobMetaData(MSOpenTable *otab, uint64_t offset, const char *meta_data, uint16_t meta_data_len, bool reset_alias, const char *alias);
293
void releaseBlob(MSOpenTable *otab, uint64_t offset, uint16_t head_size, uint32_t tab_id, uint64_t blob_id, uint64_t blob_ref_id, uint32_t auth_code);
294
void commitBlob(MSOpenTable *otab, uint64_t offset, uint16_t head_size, uint32_t tab_id, uint64_t blob_id, uint64_t blob_ref_id, uint32_t auth_code);
296
bool getBlobRefSpace(MSOpenTable *otab, uint64_t offset, uint16_t head_size, uint32_t tab_id, uint64_t blob_id, uint64_t blob_ref_id,
297
uint32_t auth_code, MSRepoTableRefPtr *tab_ref, MSRepoGenericRefPtr *free_ref, uint16_t *tab_ref_cnt, uint64_t *blob_size);
298
void realFreeBlob(MSOpenTable *otab, char *buffer, uint32_t auth_code, uint64_t offset, uint16_t head_size, uint64_t blob_size, size_t ref_size);
300
void freeTableReference(MSOpenTable *otab, uint64_t offset, uint16_t head_size, uint32_t tab_id, uint64_t blob_id, uint32_t auth_code);
301
void checkBlob(CSStringBuffer *buffer, uint64_t offset, uint32_t auth_code, uint32_t temp_log_id, uint32_t temp_log_offset);
303
void updateAccess(MSBlobHeadPtr blob, uint64_t rep_offset);
304
virtual void returnToPool();
306
virtual CSObject *getNextLink() { return iNextLink; }
307
virtual CSObject *getPrevLink() { return iPrevLink; }
308
virtual void setNextLink(CSObject *link) { iNextLink = link; }
309
virtual void setPrevLink(CSObject *link) { iPrevLink = link; }
311
friend class MSRepository;
318
void update_blob_header(MSOpenTable *otab, uint64_t offset, uint64_t blob_size, uint16_t head_size, uint16_t new_head_size);
319
void removeBlob(MSOpenTable *otab, uint32_t tab_id, uint64_t blob_id, uint64_t offset, uint32_t auth_code);
320
static MSRepoFile *newRepoFile(MSRepository *repo, CSPath *path);
322
void updateGarbage(uint64_t size);
325
static void getBlobKey(MSBlobHeadPtr blob, CloudKeyPtr key)
327
key->creation_time = CS_GET_DISK_4(blob->rb_create_time_4);
328
key->ref_index = CS_GET_DISK_4(blob->rb_s3_key_id_4);
329
key->cloud_ref = CS_GET_DISK_4(blob->rb_s3_cloud_ref_4);
334
#define CS_REPO_REC_LOCK_COUNT 31
336
typedef enum RepoLockStates { // These states are actually bit masks
337
REPO_UNLOCKED = 0, // Repository is not locked by anyone.
338
REPO_COMPACTING = 1, // Repository is locked by the compactor thread.
339
REPO_WRITE = 2, // Repository is locked for writing a new BLOB to it.
340
REPO_BACKUP = 4 // Repository is locked for backup.
343
// The REPO_COMPACTING and REPO_WRITE states are mutualy exclusive but REPO_BACKUP is not.
346
// It is possible that when a repository is scheduled for backup it is already locked by the compactor thread
347
// or it is locked because a new BLOB is being written to it. In the cases where it is locked by the compactor,
348
// the compactore is suspended until the repository is backed up. In the case where a BLOB is being written
349
// to it both threads are allowed access to it and the resetting of the lock state is handled in returnToPool().
350
// It is safe to allow the backup thread to access the repository at the same time as other threads because
351
// backup is a read only operation.
352
class MSRepository : public CSSharedRefObject, public CSPooled {
355
off64_t myRepoFileSize;
356
uint32_t myRepoLockState; // Bit mask of RepoLockStates
357
bool isRemovingFP; /* Set to true if the file pool is being removed. */
358
CSMutex myRepoLock[CS_REPO_REC_LOCK_COUNT];
359
CSMutex myRepoWriteLock; // Writing requires it's own lock.
360
MSDatabase *myRepoDatabase;
361
off64_t myGarbageCount;
362
size_t myRepoHeadSize;
363
int myRepoDefRefSize;
364
size_t myRepoBlobHeadSize;
366
off64_t myRecoveryOffset; /* The starting point for the next recovery. */
367
time_t myLastTempTime;
368
time_t myLastAccessTime;
369
time_t myLastCreateTime;
370
time_t myLastRefTime;
372
bool mustBeDeleted; /* Set to true if the repository should be deleted when freed. */
374
MSRepository(uint32_t id, MSDatabase *db, off64_t file_size);
377
/* TODO: Check recovery after crash after each phase below. */
378
void openRepoFileForWriting(MSOpenTable *otab);
379
uint64_t receiveBlob(MSOpenTable *otab, uint16_t head_size, uint64_t blob_size, Md5Digest *checksum = NULL, CSInputStream *stream = NULL);
380
uint64_t copyBlob(MSOpenTable *otab, uint64_t size, CSInputStream *stream); // Makes a copy of the complete BLOB with header.
381
void writeBlobHead(MSOpenTable *otab, uint64_t offset, uint8_t ref_size, uint16_t head_size, uint64_t size, Md5Digest *checksum, char *metadata, uint16_t metadata_size, uint64_t blob_id, uint32_t auth_code, uint32_t log_id, uint32_t log_offset, uint8_t blob_type, CloudKeyPtr cloud_key);
382
//void writeBlobHead(MSOpenTable *otab, uint64_t offset, uint32_t access_time, uint32_t create_time, uint8_t ref_size, uint16_t head_size, uint64_t blob_size, Md5Digest *checksum, uint16_t metadata_size, uint64_t blob_id, uint32_t auth_code, uint16_t col_index, PBMSEngineRefPtr eng_ref);
383
void setRepoFileSize(MSOpenTable *otab, off64_t offset);
384
void syncHead(MSRepoFile *fh);
385
MSRepoFile *openRepoFile();
387
virtual void returnToPool();
389
MSRepoFile *getRepoFile();
390
void addRepoFile(MSRepoFile *file);
391
void removeRepoFile(MSRepoFile *file);
392
void returnRepoFile(MSRepoFile *file);
394
bool removeRepoFilesNotInUse(); /* Return true if all files have been removed. */
396
uint16_t getDefaultHeaderSize(uint16_t metadata_size) { return myRepoBlobHeadSize + ((metadata_size)?metadata_size:MS_REPO_MIN_MATADATA) + myRepoDefRefSize * MS_REPO_MIN_REF_COUNT;}
397
off64_t getRepoFileSize();
398
size_t getRepoHeadSize();
399
size_t getRepoBlobHeadSize();
400
CSMutex *getRepoLock(off64_t offset);
401
uint32_t getRepoID();
402
uint32_t getGarbageLevel();
404
uint32_t initBackup();
405
bool lockedForBackup();
406
void backupCompleted();
407
bool isRepoLocked() { return myRepoXLock;}
408
void lockRepo(RepoLockState state);
409
void unlockRepo(RepoLockState state);
411
friend class MSRepoFile;
415
/* The read file pool: */
416
MSRepoFile *iFilePool; /* A list of files currently not in use. THIS LIST DOESN'T COUNT AS A REFERENCE! YUK!!*/
417
CSLinkedList iPoolFiles; /* A list of all files in this pool */
419
CSPath *getRepoFilePath();
420
void signalCompactor();