4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
37 #include <sys/debug.h>
38 #include <sys/socket.h>
40 #include <sys/types.h>
45 #include <sys/dmu_ctl.h>
46 #include <sys/dmu_ctl_impl.h>
48 static dctl_sock_info_t ctl_sock = {
49 .dsi_mtx = PTHREAD_MUTEX_INITIALIZER,
53 static int dctl_create_socket_common();
56 * Routines from zfs_ioctl.c
58 extern int zfs_ioctl_init();
59 extern int zfs_ioctl_fini();
60 extern int zfsdev_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr,
64 * We can't simply put the client file descriptor in wthr_info_t because we
65 * have no way of accessing it from the DMU code without extensive
68 * Therefore each worker thread will have it's own global thread-specific
71 static __thread int client_fd = -1;
73 int dctls_copyin(const void *src, void *dest, size_t size)
77 VERIFY(client_fd >= 0);
79 cmd.dcmd_msg = DCTL_COPYIN;
80 cmd.u.dcmd_copy.ptr = (uintptr_t) src;
81 cmd.u.dcmd_copy.size = size;
83 if (dctl_send_msg(client_fd, &cmd) != 0)
86 if (dctl_read_data(client_fd, dest, size) != 0)
92 int dctls_copyinstr(const char *from, char *to, size_t max, size_t *len)
97 VERIFY(client_fd >= 0);
104 msg.dcmd_msg = DCTL_COPYINSTR;
105 msg.u.dcmd_copy.ptr = (uintptr_t) from;
106 msg.u.dcmd_copy.size = max;
108 if (dctl_send_msg(client_fd, &msg) != 0)
111 if (dctl_read_msg(client_fd, &msg) != 0)
114 if (msg.dcmd_msg != DCTL_GEN_REPLY)
117 copied = msg.u.dcmd_reply.size;
123 if (dctl_read_data(client_fd, to, copied) != 0)
131 return msg.u.dcmd_reply.rc;
134 int dctls_copyout(const void *src, void *dest, size_t size)
138 VERIFY(client_fd >= 0);
140 cmd.dcmd_msg = DCTL_COPYOUT;
141 cmd.u.dcmd_copy.ptr = (uintptr_t) dest;
142 cmd.u.dcmd_copy.size = size;
144 if (dctl_send_msg(client_fd, &cmd) != 0)
147 if (dctl_send_data(client_fd, src, size) != 0)
153 int dctls_fd_read(int fd, void *buf, ssize_t len, ssize_t *residp)
159 VERIFY(client_fd >= 0);
161 msg.dcmd_msg = DCTL_FD_READ;
162 msg.u.dcmd_fd_io.fd = fd;
163 msg.u.dcmd_fd_io.size = len;
165 if ((error = dctl_send_msg(client_fd, &msg)) != 0)
168 if ((error = dctl_read_msg(client_fd, &msg)) != 0)
171 if (msg.dcmd_msg != DCTL_GEN_REPLY)
174 if (msg.u.dcmd_reply.rc != 0)
175 return msg.u.dcmd_reply.rc;
177 dsize = msg.u.dcmd_reply.size;
180 error = dctl_read_data(client_fd, buf, dsize);
182 *residp = len - dsize;
187 int dctls_fd_write(int fd, const void *src, ssize_t len)
192 VERIFY(client_fd >= 0);
194 msg.dcmd_msg = DCTL_FD_WRITE;
195 msg.u.dcmd_fd_io.fd = fd;
196 msg.u.dcmd_fd_io.size = len;
198 error = dctl_send_msg(client_fd, &msg);
201 error = dctl_send_data(client_fd, src, len);
204 error = dctl_read_msg(client_fd, &msg);
209 if (msg.dcmd_msg != DCTL_GEN_REPLY)
212 if (msg.u.dcmd_reply.rc != 0)
213 return msg.u.dcmd_reply.rc;
216 * We have to do this because the original upstream code
217 * does not check if residp == len.
219 if (msg.u.dcmd_reply.size != len)
225 /* Handle a new connection */
226 static void dctl_handle_conn(int sock_fd)
234 while (dctl_read_msg(sock_fd, &cmd) == 0) {
235 if (cmd.dcmd_msg != DCTL_IOCTL) {
236 fprintf(stderr, "%s(): unexpected message type.\n",
241 rc = zfsdev_ioctl(dev, cmd.u.dcmd_ioctl.cmd,
242 (intptr_t) cmd.u.dcmd_ioctl.arg, 0, NULL, NULL);
244 cmd.dcmd_msg = DCTL_IOCTL_REPLY;
245 cmd.u.dcmd_reply.rc = rc;
247 if (dctl_send_msg(sock_fd, &cmd) != 0)
255 /* Main worker thread loop */
256 static void *dctl_thread(void *arg)
258 wthr_info_t *thr = arg;
259 struct pollfd fds[1];
261 fds[0].events = POLLIN;
263 pthread_mutex_lock(&ctl_sock.dsi_mtx);
265 while (!thr->wthr_exit) {
266 /* Clean-up dead threads */
269 /* The file descriptor might change in the thread lifetime */
270 fds[0].fd = ctl_sock.dsi_fd;
272 /* Poll socket with 1-second timeout */
273 int rc = poll(fds, 1, 1000);
274 if (rc == 0 || (rc == -1 && errno == EINTR))
277 /* Recheck the exit flag */
282 /* Unknown error, let's try to recreate the socket */
283 close(ctl_sock.dsi_fd);
284 ctl_sock.dsi_fd = -1;
286 if (dctl_create_socket_common() != 0)
293 short rev = fds[0].revents;
296 ASSERT(rev == POLLIN);
299 * At this point there should be a connection ready to be
302 int client_fd = accept(ctl_sock.dsi_fd, NULL, NULL);
303 /* Many possible errors here, we'll just retry */
308 * Now lets handle the request. This can take a very
309 * long time (hours even), so we'll let other threads
310 * handle new connections.
312 pthread_mutex_unlock(&ctl_sock.dsi_mtx);
314 dctl_thr_rebalance(thr, B_FALSE);
315 dctl_handle_conn(client_fd);
316 dctl_thr_rebalance(thr, B_TRUE);
318 pthread_mutex_lock(&ctl_sock.dsi_mtx);
320 pthread_mutex_unlock(&ctl_sock.dsi_mtx);
327 static int dctl_create_socket_common()
329 dctl_sock_info_t *s = &ctl_sock;
333 ASSERT(s->dsi_fd == -1);
336 * Unlink old socket, in case it exists.
337 * We don't care about errors here.
341 /* Create the socket */
342 s->dsi_fd = socket(PF_UNIX, SOCK_STREAM, 0);
343 if (s->dsi_fd == -1) {
349 s->dsi_addr.sun_family = AF_UNIX;
351 size = sizeof(s->dsi_addr.sun_path) - 1;
352 strncpy(s->dsi_addr.sun_path, s->dsi_path, size);
354 s->dsi_addr.sun_path[size] = '\0';
356 if (bind(s->dsi_fd, (struct sockaddr *) &s->dsi_addr,
357 sizeof(s->dsi_addr)) != 0) {
363 if (listen(s->dsi_fd, LISTEN_BACKLOG) != 0) {
373 static int dctl_create_socket(const char *cfg_dir)
376 dctl_sock_info_t *s = &ctl_sock;
378 ASSERT(s->dsi_path == NULL);
379 ASSERT(s->dsi_fd == -1);
381 int pathsize = strlen(cfg_dir) + strlen(SOCKNAME) + 2;
382 if (pathsize > sizeof(s->dsi_addr.sun_path))
385 s->dsi_path = malloc(pathsize);
386 if (s->dsi_path == NULL)
389 strcpy(s->dsi_path, cfg_dir);
390 strcat(s->dsi_path, "/" SOCKNAME);
393 * For convenience, create the directory in case it doesn't exist.
394 * We don't care about errors here.
396 mkdir(cfg_dir, 0770);
398 error = dctl_create_socket_common();
403 if (s->dsi_fd != -1) {
412 static void dctl_destroy_socket()
414 dctl_sock_info_t *s = &ctl_sock;
416 ASSERT(s->dsi_path != NULL);
417 ASSERT(s->dsi_fd != -1);
427 * Initialize the DMU userspace control interface.
428 * This should be called after kernel_init().
430 * Note that only very rarely we have more than a couple of simultaneous
431 * lzfs/lzpool connections. Since the thread pool grows automatically when all
432 * threads are busy, a good value for min_thr and max_free_thr is 2.
434 int dctl_server_init(const char *cfg_dir, int min_thr, int max_free_thr)
439 ASSERT(max_free_thr >= min_thr);
441 error = zfs_ioctl_init();
445 error = dctl_create_socket(cfg_dir);
447 (void) zfs_ioctl_fini();
451 error = dctl_thr_pool_create(min_thr, max_free_thr, dctl_thread);
453 (void) zfs_ioctl_fini();
454 dctl_destroy_socket();
462 * Terminate control interface.
463 * This should be called after closing all objsets, but before calling
465 * May return EBUSY if the SPA is busy.
467 * Thread pool destruction can take a while due to poll()
468 * timeout or due to a thread being busy (e.g. a backup is being taken).
470 int dctl_server_fini()
472 dctl_thr_pool_stop();
473 dctl_destroy_socket();
475 return zfs_ioctl_fini();