~drizzle-trunk/drizzle/development

1 by brian
clean slate
1
/******************************************************
2
The interface to the operating system
3
process control primitives
4
5
(c) 1995 Innobase Oy
6
7
Created 9/30/1995 Heikki Tuuri
8
*******************************************************/
9
10
#include "os0proc.h"
11
#ifdef UNIV_NONINL
12
#include "os0proc.ic"
13
#endif
14
15
#include "ut0mem.h"
16
#include "ut0byte.h"
17
18
19
/*
20
How to get AWE to compile on Windows?
21
-------------------------------------
22
23
In the project settings of the innobase project the Visual C++ source,
24
__WIN2000__ has to be defined.
25
26
The Visual C++ has to be relatively recent and _WIN32_WINNT has to be
27
defined to a value >= 0x0500 when windows.h is included.
28
29
#define _WIN32_WINNT	0x0500
30
31
Where does AWE work?
32
-------------------
33
34
See the error message in os_awe_allocate_physical_mem().
35
36
How to assign privileges for mysqld to use AWE?
37
-----------------------------------------------
38
39
See the error message in os_awe_enable_lock_pages_in_mem().
40
41
Use Windows AWE functions in this order
42
---------------------------------------
43
44
(1) os_awe_enable_lock_pages_in_mem();
45
(2) os_awe_allocate_physical_mem();
46
(3) os_awe_allocate_virtual_mem_window();
47
(4) os_awe_map_physical_mem_to_window().
48
49
To test 'AWE' in a computer which does not have the AWE API,
50
you can compile with UNIV_SIMULATE_AWE defined in this file.
51
*/
52
53
#ifdef UNIV_SIMULATE_AWE
54
/* If we simulate AWE, we allocate the 'physical memory' here */
55
byte*		os_awe_simulate_mem;
56
ulint		os_awe_simulate_mem_size;
57
os_awe_t*	os_awe_simulate_page_info;
58
byte*		os_awe_simulate_window;
59
ulint		os_awe_simulate_window_size;
60
/* In simulated AWE the following contains a NULL pointer or a pointer
61
to a mapped 'physical page' for each 4 kB page in the AWE window */
62
byte**		os_awe_simulate_map;
63
#endif
64
65
#ifdef __WIN2000__
66
os_awe_t*	os_awe_page_info;
67
ulint		os_awe_n_pages;
68
byte*		os_awe_window;
69
ulint		os_awe_window_size;
70
#endif
71
72
ibool os_use_large_pages;
73
/* Large page size. This may be a boot-time option on some platforms */
74
ulint os_large_page_size;
75
76
/********************************************************************
77
Windows AWE support. Tries to enable the "lock pages in memory" privilege for
78
the current process so that the current process can allocate memory-locked
79
virtual address space to act as the window where AWE maps physical memory. */
80
81
ibool
82
os_awe_enable_lock_pages_in_mem(void)
83
/*=================================*/
84
				/* out: TRUE if success, FALSE if error;
85
				prints error info to stderr if no success */
86
{
87
#ifdef UNIV_SIMULATE_AWE
88
89
	return(TRUE);
90
91
#elif defined(__WIN2000__)
92
	struct {
93
		DWORD			Count;
94
		LUID_AND_ATTRIBUTES	Privilege[1];
95
	}	Info;
96
	HANDLE	hProcess;
97
	HANDLE	Token;
98
	BOOL	Result;
99
100
	hProcess = GetCurrentProcess();
101
102
	/* Open the token of the current process */
103
104
	Result = OpenProcessToken(hProcess,
105
				  TOKEN_ADJUST_PRIVILEGES, &Token);
106
	if (Result != TRUE) {
107
		fprintf(stderr,
108
			"InnoDB: AWE: Cannot open process token, error %lu\n",
109
			(ulint)GetLastError());
110
		return(FALSE);
111
	}
112
113
	Info.Count = 1;
114
115
	Info.Privilege[0].Attributes = SE_PRIVILEGE_ENABLED;
116
117
	/* Get the local unique identifier (LUID) of the SE_LOCK_MEMORY
118
	privilege */
119
120
	Result = LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME,
121
				      &(Info.Privilege[0].Luid));
122
	if (Result != TRUE) {
123
		fprintf(stderr,
124
			"InnoDB: AWE: Cannot get local privilege"
125
			" value for %s, error %lu.\n",
126
			SE_LOCK_MEMORY_NAME, (ulint)GetLastError());
127
128
		return(FALSE);
129
	}
130
131
	/* Try to adjust the privilege */
132
133
	Result = AdjustTokenPrivileges(Token, FALSE,
134
				       (PTOKEN_PRIVILEGES)&Info,
135
				       0, NULL, NULL);
136
	/* Check the result */
137
138
	if (Result != TRUE) {
139
		fprintf(stderr,
140
			"InnoDB: AWE: Cannot adjust process token privileges,"
141
			" error %u.\n",
142
			GetLastError());
143
		return(FALSE);
144
	} else if (GetLastError() != ERROR_SUCCESS) {
145
		fprintf(stderr,
146
			"InnoDB: AWE: Cannot enable SE_LOCK_MEMORY privilege,"
147
			" error %lu.\n"
148
			"InnoDB: In Windows XP Home you cannot use AWE."
149
			" In Windows 2000 and XP\n"
150
			"InnoDB: Professional you must go to the"
151
			" Control Panel, to\n"
152
			"InnoDB: Security Settings, to Local Policies,"
153
			" and enable\n"
154
			"InnoDB: the 'lock pages in memory' privilege"
155
			" for the user who runs\n"
156
			"InnoDB: the MySQL server.\n", GetLastError());
157
158
		return(FALSE);
159
	}
160
161
	CloseHandle(Token);
162
163
	return(TRUE);
164
#else
165
#ifdef __WIN__
166
	fprintf(stderr,
167
		"InnoDB: AWE: Error: to use AWE you must use"
168
		" a ...-nt MySQL executable.\n");
169
#endif
170
	return(FALSE);
171
#endif
172
}
173
174
/********************************************************************
175
Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86
176
processor. */
177
178
ibool
179
os_awe_allocate_physical_mem(
180
/*=========================*/
181
				/* out: TRUE if success */
182
	os_awe_t** page_info,	/* out, own: array of opaque data containing
183
				the info for allocated physical memory pages;
184
				each allocated 4 kB physical memory page has
185
				one slot of type os_awe_t in the array */
186
	ulint	  n_megabytes)	/* in: number of megabytes to allocate */
187
{
188
#ifdef UNIV_SIMULATE_AWE
189
	os_awe_simulate_page_info = ut_malloc
190
		(sizeof(os_awe_t) * n_megabytes
191
		 * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE));
192
193
	os_awe_simulate_mem
194
		= ut_align(ut_malloc(4096 + 1024 * 1024 * n_megabytes), 4096);
195
	os_awe_simulate_mem_size = n_megabytes * 1024 * 1024;
196
197
	*page_info = os_awe_simulate_page_info;
198
199
	return(TRUE);
200
201
#elif defined(__WIN2000__)
202
	BOOL		bResult;
203
	os_awe_t	NumberOfPages;		/* Question: why does Windows
204
						use the name ULONG_PTR for
205
						a scalar integer type? Maybe
206
						because we may also refer to
207
						&NumberOfPages? */
208
	os_awe_t	NumberOfPagesInitial;
209
	SYSTEM_INFO	sSysInfo;
210
	int		PFNArraySize;
211
212
	if (n_megabytes > 64 * 1024) {
213
214
		fprintf(stderr,
215
			"InnoDB: AWE: Error: tried to allocate %lu MB.\n"
216
			"InnoDB: AWE cannot allocate more than"
217
			" 64 GB in any computer.\n", n_megabytes);
218
219
		return(FALSE);
220
	}
221
222
	GetSystemInfo(&sSysInfo);  /* fill the system information structure */
223
224
	if ((ulint)OS_AWE_X86_PAGE_SIZE != (ulint)sSysInfo.dwPageSize) {
225
		fprintf(stderr,
226
			"InnoDB: AWE: Error: this computer has a page size"
227
			" of %lu.\n"
228
			"InnoDB: Should be 4096 bytes for"
229
			" InnoDB AWE support to work.\n",
230
			(ulint)sSysInfo.dwPageSize);
231
232
		return(FALSE);
233
	}
234
235
	/* Calculate the number of pages of memory to request */
236
237
	NumberOfPages = n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE);
238
239
	/* Calculate the size of page_info for allocated physical pages */
240
241
	PFNArraySize = NumberOfPages * sizeof(os_awe_t);
242
243
	*page_info = (os_awe_t*)HeapAlloc(GetProcessHeap(), 0, PFNArraySize);
244
245
	if (*page_info == NULL) {
246
		fprintf(stderr,
247
			"InnoDB: AWE: Failed to allocate page info"
248
			" array from process heap, error %lu\n",
249
			(ulint)GetLastError());
250
251
		return(FALSE);
252
	}
253
254
	ut_total_allocated_memory += PFNArraySize;
255
256
	/* Enable this process' privilege to lock pages to physical memory */
257
258
	if (!os_awe_enable_lock_pages_in_mem()) {
259
260
		return(FALSE);
261
	}
262
263
	/* Allocate the physical memory */
264
265
	NumberOfPagesInitial = NumberOfPages;
266
267
	os_awe_page_info = *page_info;
268
	os_awe_n_pages = (ulint)NumberOfPages;
269
270
	/* Compilation note: if the compiler complains the function is not
271
	defined, see the note at the start of this file */
272
273
	bResult = AllocateUserPhysicalPages(GetCurrentProcess(),
274
					    &NumberOfPages, *page_info);
275
	if (bResult != TRUE) {
276
		fprintf(stderr,
277
			"InnoDB: AWE: Cannot allocate physical pages,"
278
			" error %lu.\n",
279
			(ulint)GetLastError());
280
281
		return(FALSE);
282
	}
283
284
	if (NumberOfPagesInitial != NumberOfPages) {
285
		fprintf(stderr,
286
			"InnoDB: AWE: Error: allocated only %lu pages"
287
			" of %lu requested.\n"
288
			"InnoDB: Check that you have enough free RAM.\n"
289
			"InnoDB: In Windows XP Professional and"
290
			" 2000 Professional\n"
291
			"InnoDB: Windows PAE size is max 4 GB."
292
			" In 2000 and .NET\n"
293
			"InnoDB: Advanced Servers and 2000 Datacenter Server"
294
			" it is 32 GB,\n"
295
			"InnoDB: and in .NET Datacenter Server it is 64 GB.\n"
296
			"InnoDB: A Microsoft web page said that"
297
			" the processor must be an Intel\n"
298
			"InnoDB: processor.\n",
299
			(ulint)NumberOfPages,
300
			(ulint)NumberOfPagesInitial);
301
302
		return(FALSE);
303
	}
304
305
	fprintf(stderr,
306
		"InnoDB: Using Address Windowing Extensions (AWE);"
307
		" allocated %lu MB\n",
308
		n_megabytes);
309
310
	return(TRUE);
311
#else
312
	UT_NOT_USED(n_megabytes);
313
	UT_NOT_USED(page_info);
314
315
	return(FALSE);
316
#endif
317
}
318
319
/********************************************************************
320
Allocates a window in the virtual address space where we can map then
321
pages of physical memory. */
322
323
byte*
324
os_awe_allocate_virtual_mem_window(
325
/*===============================*/
326
			/* out, own: allocated memory, or NULL if did not
327
			succeed */
328
	ulint	size)	/* in: virtual memory allocation size in bytes, must
329
			be < 2 GB */
330
{
331
#ifdef UNIV_SIMULATE_AWE
332
	ulint	i;
333
334
	os_awe_simulate_window = ut_align(ut_malloc(4096 + size), 4096);
335
	os_awe_simulate_window_size = size;
336
337
	os_awe_simulate_map = ut_malloc(sizeof(byte*) * (size / 4096));
338
339
	for (i = 0; i < (size / 4096); i++) {
340
		*(os_awe_simulate_map + i) = NULL;
341
	}
342
343
	return(os_awe_simulate_window);
344
345
#elif defined(__WIN2000__)
346
	byte*	ptr;
347
348
	if (size > (ulint)0x7FFFFFFFUL) {
349
		fprintf(stderr,
350
			"InnoDB: AWE: Cannot allocate %lu bytes"
351
			" of virtual memory\n", size);
352
353
		return(NULL);
354
	}
355
356
	ptr = VirtualAlloc(NULL, (SIZE_T)size, MEM_RESERVE | MEM_PHYSICAL,
357
			   PAGE_READWRITE);
358
	if (ptr == NULL) {
359
		fprintf(stderr,
360
			"InnoDB: AWE: Cannot allocate %lu bytes"
361
			" of virtual memory, error %lu\n",
362
			size, (ulint)GetLastError());
363
364
		return(NULL);
365
	}
366
367
	os_awe_window = ptr;
368
	os_awe_window_size = size;
369
370
	ut_total_allocated_memory += size;
371
372
	return(ptr);
373
#else
374
	UT_NOT_USED(size);
375
376
	return(NULL);
377
#endif
378
}
379
380
/********************************************************************
381
With this function you can map parts of physical memory allocated with
382
the ..._allocate_physical_mem to the virtual address space allocated with
383
the previous function. Intel implements this so that the process page
384
tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP
385
showed that this takes < 1 microsecond, much better than the estimated 80 us
386
for copying a 16 kB page memory to memory. But, the operation will at least
387
partially invalidate the translation lookaside buffer (TLB) of all
388
processors. Under a real-world load the performance hit may be bigger. */
389
390
ibool
391
os_awe_map_physical_mem_to_window(
392
/*==============================*/
393
					/* out: TRUE if success; the function
394
					calls exit(1) in case of an error */
395
	byte*		ptr,		/* in: a page-aligned pointer to
396
					somewhere in the virtual address
397
					space window; we map the physical mem
398
					pages here */
399
	ulint		n_mem_pages,	/* in: number of 4 kB mem pages to
400
					map */
401
	os_awe_t*	page_info)	/* in: array of page infos for those
402
					pages; each page has one slot in the
403
					array */
404
{
405
#ifdef UNIV_SIMULATE_AWE
406
	ulint	i;
407
	byte**	map;
408
	byte*	page;
409
	byte*	phys_page;
410
411
	ut_a(ptr >= os_awe_simulate_window);
412
	ut_a(ptr < os_awe_simulate_window + os_awe_simulate_window_size);
413
	ut_a(page_info >= os_awe_simulate_page_info);
414
	ut_a(page_info < os_awe_simulate_page_info
415
	     + (os_awe_simulate_mem_size / 4096));
416
417
	/* First look if some other 'physical pages' are mapped at ptr,
418
	and copy them back to where they were if yes */
419
420
	map = os_awe_simulate_map
421
		+ ((ulint)(ptr - os_awe_simulate_window)) / 4096;
422
	page = ptr;
423
424
	for (i = 0; i < n_mem_pages; i++) {
425
		if (*map != NULL) {
426
			ut_memcpy(*map, page, 4096);
427
		}
428
		map++;
429
		page += 4096;
430
	}
431
432
	/* Then copy to ptr the 'physical pages' determined by page_info; we
433
	assume page_info is a segment of the array we created at the start */
434
435
	phys_page = os_awe_simulate_mem
436
		+ (ulint)(page_info - os_awe_simulate_page_info)
437
		* 4096;
438
439
	ut_memcpy(ptr, phys_page, n_mem_pages * 4096);
440
441
	/* Update the map */
442
443
	map = os_awe_simulate_map
444
		+ ((ulint)(ptr - os_awe_simulate_window)) / 4096;
445
446
	for (i = 0; i < n_mem_pages; i++) {
447
		*map = phys_page;
448
449
		map++;
450
		phys_page += 4096;
451
	}
452
453
	return(TRUE);
454
455
#elif defined(__WIN2000__)
456
	BOOL		bResult;
457
	os_awe_t	n_pages;
458
459
	n_pages = (os_awe_t)n_mem_pages;
460
461
	if (!(ptr >= os_awe_window)) {
462
		fprintf(stderr,
463
			"InnoDB: AWE: Error: trying to map to address %lx"
464
			" but AWE window start %lx\n",
465
			(ulint)ptr, (ulint)os_awe_window);
466
		ut_a(0);
467
	}
468
469
	if (!(ptr <= os_awe_window + os_awe_window_size - UNIV_PAGE_SIZE)) {
470
		fprintf(stderr,
471
			"InnoDB: AWE: Error: trying to map to address %lx"
472
			" but AWE window end %lx\n",
473
			(ulint)ptr, (ulint)os_awe_window + os_awe_window_size);
474
		ut_a(0);
475
	}
476
477
	if (!(page_info >= os_awe_page_info)) {
478
		fprintf(stderr,
479
			"InnoDB: AWE: Error: trying to map page info"
480
			" at %lx but array start %lx\n",
481
			(ulint)page_info, (ulint)os_awe_page_info);
482
		ut_a(0);
483
	}
484
485
	if (!(page_info <= os_awe_page_info + (os_awe_n_pages - 4))) {
486
		fprintf(stderr,
487
			"InnoDB: AWE: Error: trying to map page info"
488
			" at %lx but array end %lx\n",
489
			(ulint)page_info,
490
			(ulint)(os_awe_page_info + os_awe_n_pages));
491
		ut_a(0);
492
	}
493
494
	bResult = MapUserPhysicalPages((PVOID)ptr, n_pages, page_info);
495
496
	if (bResult != TRUE) {
497
		ut_print_timestamp(stderr);
498
		fprintf(stderr,
499
			"  InnoDB: AWE: Mapping of %lu physical pages"
500
			" to address %lx failed,\n"
501
			"InnoDB: error %lu.\n"
502
			"InnoDB: Cannot continue operation.\n",
503
			n_mem_pages, (ulint)ptr, (ulint)GetLastError());
504
		exit(1);
505
	}
506
507
	return(TRUE);
508
#else
509
	UT_NOT_USED(ptr);
510
	UT_NOT_USED(n_mem_pages);
511
	UT_NOT_USED(page_info);
512
513
	return(FALSE);
514
#endif
515
}
516
517
/********************************************************************
518
Converts the current process id to a number. It is not guaranteed that the
519
number is unique. In Linux returns the 'process number' of the current
520
thread. That number is the same as one sees in 'top', for example. In Linux
521
the thread id is not the same as one sees in 'top'. */
522
523
ulint
524
os_proc_get_number(void)
525
/*====================*/
526
{
527
#ifdef __WIN__
528
	return((ulint)GetCurrentProcessId());
529
#else
530
	return((ulint)getpid());
531
#endif
532
}
533
534
/********************************************************************
535
Allocates non-cacheable memory. */
536
537
void*
538
os_mem_alloc_nocache(
539
/*=================*/
540
			/* out: allocated memory */
541
	ulint	n)	/* in: number of bytes */
542
{
543
#ifdef __WIN__
544
	void*	ptr;
545
546
	ptr = VirtualAlloc(NULL, n, MEM_COMMIT,
547
			   PAGE_READWRITE | PAGE_NOCACHE);
548
	ut_a(ptr);
549
550
	return(ptr);
551
#else
552
	return(ut_malloc(n));
553
#endif
554
}
555
556
/********************************************************************
557
Allocates large pages memory. */
558
559
void*
560
os_mem_alloc_large(
561
/*===============*/
562
					/* out: allocated memory */
563
	ulint		n,		/* in: number of bytes */
564
	ibool		set_to_zero,	/* in: TRUE if allocated memory
565
					should be set to zero if
566
					UNIV_SET_MEM_TO_ZERO is defined */
567
	ibool		assert_on_error)/* in: if TRUE, we crash mysqld if
568
					 the memory cannot be allocated */
569
{
570
#ifdef HAVE_LARGE_PAGES
571
	ulint size;
572
	int shmid;
573
	void *ptr = NULL;
574
	struct shmid_ds buf;
575
576
	if (!os_use_large_pages || !os_large_page_size) {
577
		goto skip;
578
	}
579
580
#ifdef UNIV_LINUX
581
	/* Align block size to os_large_page_size */
582
	size = ((n - 1) & ~(os_large_page_size - 1)) + os_large_page_size;
583
584
	shmid = shmget(IPC_PRIVATE, (size_t)size, SHM_HUGETLB | SHM_R | SHM_W);
585
	if (shmid < 0) {
586
		fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to allocate"
587
			" %lu bytes. errno %d\n", n, errno);
588
	} else {
589
		ptr = shmat(shmid, NULL, 0);
590
		if (ptr == (void *)-1) {
591
			fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to"
592
				" attach shared memory segment, errno %d\n",
593
				errno);
594
		}
595
596
		/* Remove the shared memory segment so that it will be
597
		automatically freed after memory is detached or
598
		process exits */
599
		shmctl(shmid, IPC_RMID, &buf);
600
	}
601
#endif
602
603
	if (ptr) {
604
		if (set_to_zero) {
605
#ifdef UNIV_SET_MEM_TO_ZERO
606
			memset(ptr, '\0', size);
607
#endif
608
		}
609
610
		return(ptr);
611
	}
612
613
	fprintf(stderr, "InnoDB HugeTLB: Warning: Using conventional"
614
		" memory pool\n");
615
skip:
616
#endif /* HAVE_LARGE_PAGES */
617
618
	return(ut_malloc_low(n, set_to_zero, assert_on_error));
619
}
620
621
/********************************************************************
622
Frees large pages memory. */
623
624
void
625
os_mem_free_large(
626
/*==============*/
627
	void	*ptr)	/* in: number of bytes */
628
{
629
#ifdef HAVE_LARGE_PAGES
630
	if (os_use_large_pages && os_large_page_size
631
#ifdef UNIV_LINUX
632
	    && !shmdt(ptr)
633
#endif
634
	    ) {
635
		return;
636
	}
637
#endif
638
639
	ut_free(ptr);
640
}
641
642
/********************************************************************
643
Sets the priority boost for threads released from waiting within the current
644
process. */
645
646
void
647
os_process_set_priority_boost(
648
/*==========================*/
649
	ibool	do_boost)	/* in: TRUE if priority boost should be done,
650
				FALSE if not */
651
{
652
#ifdef __WIN__
653
	ibool	no_boost;
654
655
	if (do_boost) {
656
		no_boost = FALSE;
657
	} else {
658
		no_boost = TRUE;
659
	}
660
661
#if TRUE != 1
662
# error "TRUE != 1"
663
#endif
664
665
	/* Does not do anything currently!
666
	SetProcessPriorityBoost(GetCurrentProcess(), no_boost);
667
	*/
668
	fputs("Warning: process priority boost setting"
669
	      " currently not functional!\n",
670
	      stderr);
671
#else
672
	UT_NOT_USED(do_boost);
673
#endif
674
}