2 a test implementation of a HSM daemon
4 Andrew Tridgell August 2008
15 .blocking_wait = true,
26 #define SESSION_NAME "hacksmd"
28 /* no special handling on terminate in hacksmd, as we want existing
29 events to stay around so we can continue them on restart */
30 static void hsm_term_handler(int signal)
32 printf("Got signal %d - exiting\n", signal);
38 initialise DMAPI, possibly recovering an existing session. The
39 hacksmd session is never destroyed, to allow for recovery of
40 partially completed events
42 static void hsm_init(void)
44 char *dmapi_version = NULL;
45 dm_eventset_t eventSet;
49 while ((ret = dm_init_service(&dmapi_version)) == -1) {
50 if (errno != errcode) {
52 printf("Waiting for DMAPI to initialise (%d: %s)\n",
53 errno, strerror(errno));
58 printf("Initialised DMAPI version '%s'\n", dmapi_version);
60 hsm_recover_session(SESSION_NAME, &dmapi.sid);
62 /* we want mount events only initially */
64 DMEV_SET(DM_EVENT_MOUNT, eventSet);
65 ret = dm_set_disp(dmapi.sid, DM_GLOBAL_HANP, DM_GLOBAL_HLEN, DM_NO_TOKEN,
66 &eventSet, DM_EVENT_MAX);
68 printf("Failed to setup events\n");
75 called on a DM_EVENT_MOUNT event . This just needs to acknowledge
76 the mount. We don't have any sort of 'setup' step before running
77 hacksmd on a filesystem, so it just accepts mount events from any
78 filesystem that supports DMAPI
80 static void hsm_handle_mount(dm_eventmsg_t *msg)
82 dm_mount_event_t *mount;
85 dm_eventset_t eventSet;
88 mount = DM_GET_VALUE(msg, ev_data, dm_mount_event_t*);
89 hand1 = DM_GET_VALUE(mount , me_handle1, void *);
90 hand1len = DM_GET_LEN(mount, me_handle1);
93 DMEV_SET(DM_EVENT_READ, eventSet);
94 DMEV_SET(DM_EVENT_WRITE, eventSet);
95 DMEV_SET(DM_EVENT_TRUNCATE, eventSet);
96 DMEV_SET(DM_EVENT_DESTROY, eventSet);
97 ret = dm_set_eventlist(dmapi.sid, hand1, hand1len,
98 DM_NO_TOKEN, &eventSet, DM_EVENT_MAX);
100 printf("Failed to setup all event handler\n");
104 ret = dm_set_disp(dmapi.sid, hand1, hand1len, DM_NO_TOKEN,
105 &eventSet, DM_EVENT_MAX);
107 printf("Failed to setup disposition for all events\n");
111 ret = dm_respond_event(dmapi.sid, msg->ev_token,
112 DM_RESP_CONTINUE, 0, 0, NULL);
114 printf("Failed to respond to mount event\n");
120 called on a data event from DMAPI. Check the files attribute, and if
121 it is migrated then do a recall
123 static void hsm_handle_recall(dm_eventmsg_t *msg)
129 dm_attrname_t attrname;
130 dm_token_t token = msg->ev_token;
132 dm_boolean_t exactFlag;
137 dm_response_t response = DM_RESP_CONTINUE;
140 ev = DM_GET_VALUE(msg, ev_data, dm_data_event_t *);
141 hanp = DM_GET_VALUE(ev, de_handle, void *);
142 hlen = DM_GET_LEN(ev, de_handle);
144 memset(attrname.an_chars, 0, DM_ATTR_NAME_SIZE);
145 strncpy((char*)attrname.an_chars, HSM_ATTRNAME, DM_ATTR_NAME_SIZE);
147 /* make sure we have an exclusive right on the file */
148 ret = dm_query_right(dmapi.sid, hanp, hlen, token, &right);
149 if (ret != 0 && errno != ENOENT) {
150 printf("dm_query_right failed - %s\n", strerror(errno));
152 response = DM_RESP_ABORT;
156 if (right != DM_RIGHT_EXCL || errno == ENOENT) {
157 ret = dm_request_right(dmapi.sid, hanp, hlen, token, DM_RR_WAIT, DM_RIGHT_EXCL);
159 printf("dm_request_right failed - %s\n", strerror(errno));
161 response = DM_RESP_ABORT;
166 /* get the attribute from the file, and make sure it is
168 ret = dm_get_dmattr(dmapi.sid, hanp, hlen, token, &attrname,
169 sizeof(h), &h, &rlen);
171 if (errno == ENOENT) {
172 if (options.debug > 2) {
173 printf("File already recalled (no attribute)\n");
177 printf("dm_get_dmattr failed - %s\n", strerror(errno));
179 response = DM_RESP_ABORT;
183 if (rlen != sizeof(h)) {
184 printf("hsm_handle_read - bad attribute size %d\n", (int)rlen);
186 response = DM_RESP_ABORT;
190 if (strncmp(h.magic, HSM_MAGIC, sizeof(h.magic)) != 0) {
191 printf("Bad magic '%*.*s'\n", (int)sizeof(h.magic), (int)sizeof(h.magic),
194 response = DM_RESP_ABORT;
198 /* mark the file as being recalled. This ensures that if
199 hacksmd dies part way through the recall that another
200 migrate won't happen until the recall is completed by a
202 h.state = HSM_STATE_RECALL;
203 ret = dm_set_dmattr(dmapi.sid, hanp, hlen, token, &attrname, 0, sizeof(h), (void*)&h);
205 printf("dm_set_dmattr failed - %s\n", strerror(errno));
207 response = DM_RESP_ABORT;
211 /* get the migrated data from the store, and put it in the
212 file with invisible writes */
213 fd = hsm_store_open(h.device, h.inode, O_RDONLY);
215 printf("Failed to open store file for file 0x%llx:0x%llx\n",
216 (unsigned long long)h.device, (unsigned long long)h.inode);
218 response = DM_RESP_ABORT;
222 if (options.debug > 1) {
223 printf("%s %s: Recalling file %llx:%llx of size %d\n",
225 dmapi_event_string(msg->ev_type),
226 (unsigned long long)h.device, (unsigned long long)h.inode,
231 while ((ret = read(fd, buf, sizeof(buf))) > 0) {
232 int ret2 = dm_write_invis(dmapi.sid, hanp, hlen, token, DM_WRITE_SYNC, ofs, ret, buf);
234 printf("dm_write_invis failed - %s\n", strerror(errno));
236 response = DM_RESP_ABORT;
243 /* remove the attribute from the file - it is now fully recalled */
244 ret = dm_remove_dmattr(dmapi.sid, hanp, hlen, token, 0, &attrname);
246 printf("dm_remove_dmattr failed - %s\n", strerror(errno));
248 response = DM_RESP_ABORT;
252 /* remove the store file */
253 ret = hsm_store_unlink(h.device, h.inode);
255 printf("WARNING: Failed to unlink store file\n");
258 /* remove the managed region from the file */
259 ret = dm_set_region(dmapi.sid, hanp, hlen, token, 0, NULL, &exactFlag);
261 printf("failed dm_set_region - %s\n", strerror(errno));
263 response = DM_RESP_ABORT;
268 /* tell the kernel that the event has been handled */
269 ret = dm_respond_event(dmapi.sid, msg->ev_token,
270 response, retcode, 0, NULL);
272 printf("Failed to respond to read event\n");
279 called on a DM_EVENT_DESTROY event, when a file is being deleted
281 static void hsm_handle_destroy(dm_eventmsg_t *msg)
283 dm_destroy_event_t *ev;
287 dm_attrname_t attrname;
288 dm_token_t token = msg->ev_token;
291 dm_response_t response = DM_RESP_CONTINUE;
293 dm_boolean_t exactFlag;
295 ev = DM_GET_VALUE(msg, ev_data, dm_destroy_event_t *);
296 hanp = DM_GET_VALUE(ev, ds_handle, void *);
297 hlen = DM_GET_LEN(ev, ds_handle);
299 if (DM_TOKEN_EQ(token, DM_INVALID_TOKEN)) {
303 /* make sure we have an exclusive lock on the file */
304 ret = dm_query_right(dmapi.sid, hanp, hlen, token, &right);
305 if (ret != 0 && errno != ENOENT) {
306 printf("dm_query_right failed - %s\n", strerror(errno));
308 response = DM_RESP_ABORT;
312 if (right != DM_RIGHT_EXCL || errno == ENOENT) {
313 ret = dm_request_right(dmapi.sid, hanp, hlen, token, DM_RR_WAIT, DM_RIGHT_EXCL);
315 printf("dm_request_right failed - %s\n", strerror(errno));
317 response = DM_RESP_ABORT;
322 memset(attrname.an_chars, 0, DM_ATTR_NAME_SIZE);
323 strncpy((char*)attrname.an_chars, HSM_ATTRNAME, DM_ATTR_NAME_SIZE);
325 /* get the attribute and check it is valid. This is just
326 paranoia really, as the file is going away */
327 ret = dm_get_dmattr(dmapi.sid, hanp, hlen, token, &attrname,
328 sizeof(h), &h, &rlen);
330 printf("WARNING: dm_get_dmattr failed - %s\n", strerror(errno));
334 if (rlen != sizeof(h)) {
335 printf("hsm_handle_read - bad attribute size %d\n", (int)rlen);
337 response = DM_RESP_ABORT;
341 if (strncmp(h.magic, HSM_MAGIC, sizeof(h.magic)) != 0) {
342 printf("Bad magic '%*.*s'\n", (int)sizeof(h.magic), (int)sizeof(h.magic), h.magic);
344 response = DM_RESP_ABORT;
348 if (options.debug > 1) {
349 printf("%s: Destroying file %llx:%llx of size %d\n",
350 dmapi_event_string(msg->ev_type),
351 (unsigned long long)h.device, (unsigned long long)h.inode,
355 /* remove the store file */
356 ret = hsm_store_unlink(h.device, h.inode);
358 printf("WARNING: Failed to unlink store file for file 0x%llx:0x%llx\n",
359 (unsigned long long)h.device, (unsigned long long)h.inode);
362 /* remove the attribute */
363 ret = dm_remove_dmattr(dmapi.sid, hanp, hlen, token, 0, &attrname);
365 printf("dm_remove_dmattr failed - %s\n", strerror(errno));
367 response = DM_RESP_ABORT;
371 /* and clear the managed region */
372 ret = dm_set_region(dmapi.sid, hanp, hlen, token, 0, NULL, &exactFlag);
374 printf("WARNING: failed dm_set_region - %s\n", strerror(errno));
378 /* only respond if the token is real */
379 if (!DM_TOKEN_EQ(msg->ev_token,DM_NO_TOKEN) &&
380 !DM_TOKEN_EQ(msg->ev_token, DM_INVALID_TOKEN)) {
381 ret = dm_respond_event(dmapi.sid, msg->ev_token,
382 response, retcode, 0, NULL);
384 printf("Failed to respond to destroy event\n");
391 main switch for DMAPI messages
393 static void hsm_handle_message(dm_eventmsg_t *msg)
395 switch (msg->ev_type) {
397 hsm_handle_mount(msg);
401 hsm_handle_recall(msg);
403 case DM_EVENT_DESTROY:
404 hsm_handle_destroy(msg);
407 if (!DM_TOKEN_EQ(msg->ev_token,DM_NO_TOKEN) &&
408 !DM_TOKEN_EQ(msg->ev_token, DM_INVALID_TOKEN)) {
409 printf("Giving default response\n");
410 int ret = dm_respond_event(dmapi.sid, msg->ev_token,
411 DM_RESP_CONTINUE, 0, 0, NULL);
413 printf("Failed to respond to mount event\n");
422 wait for DMAPI events to come in and dispatch them
424 static void hsm_wait_events(void)
430 printf("Waiting for events\n");
434 if (options.blocking_wait) {
435 ret = dm_get_events(dmapi.sid, 0, DM_EV_WAIT, sizeof(buf), buf, &rlen);
437 /* optionally don't use DM_RR_WAIT to ensure
438 that the daemon can be killed. This is only
439 needed because GPFS uses an uninterruptible
440 sleep for dm_get_events with DM_EV_WAIT. It
441 should be an interruptible sleep */
443 ret = dm_get_events(dmapi.sid, 0, 0, sizeof(buf), buf, &rlen);
446 if (errno == EAGAIN) continue;
447 if (errno == ESTALE) {
448 printf("DMAPI service has shutdown - restarting\n");
452 printf("Failed to get event (%s)\n", strerror(errno));
456 /* loop over all the messages we received */
457 for (msg=(dm_eventmsg_t *)buf;
459 msg = DM_STEP_TO_NEXT(msg, dm_eventmsg_t *)) {
460 /* optionally fork on each message, thus
461 giving parallelism and allowing us to delay
462 recalls, simulating slow tape speeds */
463 if (options.use_fork) {
464 if (fork() != 0) continue;
465 hsm_handle_message(msg);
468 hsm_handle_message(msg);
475 on startup we look for partially completed events from an earlier
476 instance of hacksmd, and continue them if we can
478 static void hsm_cleanup_events(void)
482 dm_token_t *tok = NULL;
488 ret = dm_getall_tokens(dmapi.sid, n, tok, &n2);
489 if (ret == -1 && errno == E2BIG) {
491 tok = realloc(tok, sizeof(dm_token_t)*n);
495 printf("dm_getall_tokens - %s\n", strerror(errno));
498 if (ret == 0 && n2 == 0) {
501 printf("Cleaning up %u tokens\n", n2);
504 /* get the message associated with this token
505 back from the kernel */
506 ret = dm_find_eventmsg(dmapi.sid, tok[i], sizeof(buf), buf, &rlen);
508 printf("Unable to find message for token in cleanup\n");
511 msg = (dm_eventmsg_t *)buf;
512 /* there seems to be a bug where GPFS
513 sometimes gives us a garbage token here */
514 if (!DM_TOKEN_EQ(tok[i], msg->ev_token)) {
515 printf("Message token mismatch in cleanup\n");
516 dm_respond_event(dmapi.sid, tok[i],
517 DM_RESP_ABORT, EINTR, 0, NULL);
519 hsm_handle_message(msg);
529 static void usage(void)
531 printf("Usage: hacksmd <options>\n");
532 printf("\n\tOptions:\n");
533 printf("\t\t -c cleanup lost tokens\n");
534 printf("\t\t -N use a non-blocking event wait\n");
535 printf("\t\t -d level choose debug level\n");
536 printf("\t\t -F fork to handle each event\n");
541 int main(int argc, char * const argv[])
544 bool cleanup = false;
546 /* parse command-line options */
547 while ((opt = getopt(argc, argv, "chNd:F")) != -1) {
553 options.debug = strtoul(optarg, NULL, 0);
556 options.blocking_wait = false;
559 options.use_fork = true;
573 signal(SIGCHLD, SIG_IGN);
575 signal(SIGTERM, hsm_term_handler);
576 signal(SIGINT, hsm_term_handler);
581 hsm_cleanup_tokens(dmapi.sid, DM_RESP_ABORT, EINTR);
585 hsm_cleanup_events();