From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 05/20] copy.c: add copy_dir_recursively()
Date: Wed, 3 Feb 2016 16:35:35 +0700 [thread overview]
Message-ID: <1454492150-10628-6-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1454492150-10628-1-git-send-email-pclouds@gmail.com>
This is busybox's copy_file() [1] modified to fit in Git. Because this
is busybox, the code is likely POSIX-y (or even Linux-y). Windows
support may not be there yet.
[1] in libbb/copy_file.c from the GPL2+ commit
f2c043acfcf9dad9fd3d65821b81f89986bbe54e (busybox: fix
uninitialized memory when displaying IPv6 addresses - 2016-01-18)
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
cache.h | 1 +
copy.c | 371 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 372 insertions(+)
diff --git a/cache.h b/cache.h
index c75d13f..3fbb38d 100644
--- a/cache.h
+++ b/cache.h
@@ -1638,6 +1638,7 @@ extern void fprintf_or_die(FILE *, const char *fmt, ...);
extern int copy_fd(int ifd, int ofd);
extern int copy_file(const char *dst, const char *src, int mode);
extern int copy_file_with_time(const char *dst, const char *src, int mode);
+extern int copy_dir_recursively(const char *source, const char *dest);
extern void write_or_die(int fd, const void *buf, size_t count);
extern int write_or_whine(int fd, const void *buf, size_t count, const char *msg);
diff --git a/copy.c b/copy.c
index 574fa1f..c99d6e5 100644
--- a/copy.c
+++ b/copy.c
@@ -1,4 +1,6 @@
#include "cache.h"
+#include "dir.h"
+#include "hashmap.h"
int copy_fd(int ifd, int ofd)
{
@@ -65,3 +67,372 @@ int copy_file_with_time(const char *dst, const char *src, int mode)
return copy_times(dst, src);
return status;
}
+
+struct inode_key {
+ struct hashmap_entry entry;
+ ino_t ino;
+ dev_t dev;
+ /*
+ * Reportedly, on cramfs a file and a dir can have same ino.
+ * Need to also remember "file/dir" bit:
+ */
+ char isdir; /* bool */
+};
+
+struct inode_value {
+ struct inode_key key;
+ char name[FLEX_ARRAY];
+};
+
+#define HASH_SIZE 311u /* Should be prime */
+static inline unsigned hash_inode(ino_t i)
+{
+ return i % HASH_SIZE;
+}
+
+static int inode_cmp(const void *entry, const void *entry_or_key,
+ const void *keydata)
+{
+ const struct inode_value *inode = entry;
+ const struct inode_key *key = entry_or_key;
+
+ return !(inode->key.ino == key->ino &&
+ inode->key.dev == key->dev &&
+ inode->key.isdir == key->isdir);
+}
+
+static const char *is_in_ino_dev_hashtable(const struct hashmap *map,
+ const struct stat *st)
+{
+ struct inode_key key;
+ struct inode_value *value;
+
+ key.entry.hash = hash_inode(st->st_ino);
+ key.ino = st->st_ino;
+ key.dev = st->st_dev;
+ key.isdir = !!S_ISDIR(st->st_mode);
+ value = hashmap_get(map, &key, NULL);
+ return value ? value->name : NULL;
+}
+
+static void add_to_ino_dev_hashtable(struct hashmap *map,
+ const struct stat *st,
+ const char *path)
+{
+ struct inode_value *v;
+ int len = strlen(path);
+
+ v = xmalloc(offsetof(struct inode_value, name) + len + 1);
+ v->key.entry.hash = hash_inode(st->st_ino);
+ v->key.ino = st->st_ino;
+ v->key.dev = st->st_dev;
+ v->key.isdir = !!S_ISDIR(st->st_mode);
+ memcpy(v->name, path, len + 1);
+ hashmap_add(map, v);
+}
+
+/*
+ * Find out if the last character of a string matches the one given.
+ * Don't underrun the buffer if the string length is 0.
+ */
+static inline char *last_char_is(const char *s, int c)
+{
+ if (s && *s) {
+ size_t sz = strlen(s) - 1;
+ s += sz;
+ if ( (unsigned char)*s == c)
+ return (char*)s;
+ }
+ return NULL;
+}
+
+static inline char *concat_path_file(const char *path, const char *filename)
+{
+ struct strbuf sb = STRBUF_INIT;
+ char *lc;
+
+ if (!path)
+ path = "";
+ lc = last_char_is(path, '/');
+ while (*filename == '/')
+ filename++;
+ strbuf_addf(&sb, "%s%s%s", path, (lc==NULL ? "/" : ""), filename);
+ return strbuf_detach(&sb, NULL);
+}
+
+static char *concat_subpath_file(const char *path, const char *f)
+{
+ if (f && is_dot_or_dotdot(f))
+ return NULL;
+ return concat_path_file(path, f);
+}
+
+static int do_unlink(const char *dest)
+{
+ int e = errno;
+
+ if (unlink(dest) < 0) {
+ errno = e; /* do not use errno from unlink */
+ return sys_error(_("can't create '%s'"), dest);
+ }
+ return 0;
+}
+
+/* See busybox.git, libbb/copy_file.c for the original implementation */
+static int copy_dir_1(struct hashmap *inode_map,
+ const char *source,
+ const char *dest)
+{
+ /* This is a recursive function, try to minimize stack usage */
+ struct stat source_stat;
+ struct stat dest_stat;
+ int retval = 0;
+ int dest_exists = 0;
+ int ovr;
+
+ if (stat(source, &source_stat) < 0)
+ return sys_error(_("can't stat '%s'"), source);
+
+ if (lstat(dest, &dest_stat) < 0) {
+ if (errno != ENOENT)
+ return sys_error(_("can't stat '%s'"), dest);
+ } else {
+ if (source_stat.st_dev == dest_stat.st_dev &&
+ source_stat.st_ino == dest_stat.st_ino)
+ return sys_error(_("'%s' and '%s' are the same file"), source, dest);
+ dest_exists = 1;
+ }
+
+ if (S_ISDIR(source_stat.st_mode)) {
+ DIR *dp;
+ const char *tp;
+ struct dirent *d;
+ mode_t saved_umask = 0;
+
+ /* Did we ever create source ourself before? */
+ tp = is_in_ino_dev_hashtable(inode_map, &source_stat);
+ if (tp)
+ /* We did! it's a recursion! man the lifeboats... */
+ return error(_("recursion detected, omitting directory '%s'"),
+ source);
+
+ if (dest_exists) {
+ if (!S_ISDIR(dest_stat.st_mode))
+ return sys_error(_("target '%s' is not a directory"), dest);
+ /*
+ * race here: user can substitute a symlink between
+ * this check and actual creation of files inside dest
+ */
+ } else {
+ /* Create DEST */
+ mode_t mode;
+ saved_umask = umask(0);
+
+ mode = source_stat.st_mode;
+ /* Allow owner to access new dir (at least for now) */
+ mode |= S_IRWXU;
+ if (mkdir(dest, mode) < 0) {
+ umask(saved_umask);
+ return sys_error(_("can't create directory '%s'"), dest);
+ }
+ umask(saved_umask);
+ /* need stat info for add_to_ino_dev_hashtable */
+ if (lstat(dest, &dest_stat) < 0)
+ return sys_error(_("can't stat '%s'"), dest);
+ }
+
+ /*
+ * remember (dev,inode) of each created dir. name is
+ * not remembered
+ */
+ add_to_ino_dev_hashtable(inode_map, &dest_stat, "");
+
+ /* Recursively copy files in SOURCE */
+ dp = opendir(source);
+ if (!dp) {
+ retval = -1;
+ goto preserve_mode_ugid_time;
+ }
+
+ while ((d = readdir(dp))) {
+ char *new_source, *new_dest;
+
+ new_source = concat_subpath_file(source, d->d_name);
+ if (!new_source)
+ continue;
+ new_dest = concat_path_file(dest, d->d_name);
+ if (copy_dir_1(inode_map, new_source, new_dest) < 0)
+ retval = -1;
+ free(new_source);
+ free(new_dest);
+ }
+ closedir(dp);
+
+ if (!dest_exists &&
+ chmod(dest, source_stat.st_mode & ~saved_umask) < 0) {
+ sys_error(_("can't preserve permissions of '%s'"), dest);
+ /* retval = -1; - WRONG! copy *WAS* made */
+ }
+ goto preserve_mode_ugid_time;
+ }
+
+ /* "cp [-opts] regular_file thing2" */
+ if (S_ISREG(source_stat.st_mode)) {
+ const char *link_target;
+ int src_fd;
+ int dst_fd;
+ mode_t new_mode;
+
+ if (S_ISLNK(source_stat.st_mode)) {
+ /* "cp -d symlink dst": create a link */
+ goto dont_cat;
+ }
+
+ link_target = is_in_ino_dev_hashtable(inode_map, &source_stat);
+ if (link_target) {
+ if (link(link_target, dest) < 0) {
+ ovr = do_unlink(dest);
+ if (ovr < 0)
+ return ovr;
+ if (link(link_target, dest) < 0) {
+ sys_error(_("can't create link '%s'"), dest);
+ return -1;
+ }
+ }
+ return 0;
+ }
+ add_to_ino_dev_hashtable(inode_map, &source_stat, dest);
+
+ src_fd = open(source, O_RDONLY);
+ if (src_fd < 0)
+ return sys_error(_("can't open '%s'"), source);
+
+ /* Do not try to open with weird mode fields */
+ new_mode = source_stat.st_mode;
+ if (!S_ISREG(source_stat.st_mode))
+ new_mode = 0666;
+
+ dst_fd = open(dest, O_WRONLY|O_CREAT|O_EXCL, new_mode);
+ if (dst_fd == -1) {
+ ovr = do_unlink(dest);
+ if (ovr < 0) {
+ close(src_fd);
+ return ovr;
+ }
+ /* It shouldn't exist. If it exists, do not open (symlink attack?) */
+ dst_fd = open(dest, O_WRONLY|O_CREAT|O_EXCL, new_mode);
+ if (dst_fd < 0) {
+ close(src_fd);
+ return sys_error(_("can't open '%s'"), dest);
+ }
+ }
+
+ switch (copy_fd(src_fd, dst_fd)) {
+ case COPY_READ_ERROR:
+ error(_("copy-fd: read returned %s"), strerror(errno));
+ retval = -1;
+ break;
+ case COPY_WRITE_ERROR:
+ error(_("copy-fd: write returned %s"), strerror(errno));
+ retval = -1;
+ break;
+ }
+
+ /* Careful with writing... */
+ if (close(dst_fd) < 0)
+ retval = sys_error(_("error writing to '%s'"), dest);
+ /* ...but read size is already checked by bb_copyfd_eof */
+ close(src_fd);
+ /*
+ * "cp /dev/something new_file" should not
+ * copy mode of /dev/something
+ */
+ if (!S_ISREG(source_stat.st_mode))
+ return retval;
+ goto preserve_mode_ugid_time;
+ }
+dont_cat:
+
+ /* Source is a symlink or a special file */
+ /* We are lazy here, a bit lax with races... */
+ if (dest_exists) {
+ errno = EEXIST;
+ ovr = do_unlink(dest);
+ if (ovr < 0)
+ return ovr;
+ }
+ if (S_ISLNK(source_stat.st_mode)) {
+ struct strbuf lpath = STRBUF_INIT;
+ if (!strbuf_readlink(&lpath, source, 0)) {
+ int r = symlink(lpath.buf, dest);
+ strbuf_release(&lpath);
+ if (r < 0)
+ return sys_error(_("can't create symlink '%s'"), dest);
+ if (lchown(dest, source_stat.st_uid, source_stat.st_gid) < 0)
+ sys_error(_("can't preserve %s of '%s'"), "ownership", dest);
+ } else {
+ /* EINVAL => "file: Invalid argument" => puzzled user */
+ const char *errmsg = _("not a symlink");
+ int err = errno;
+
+ if (err != EINVAL)
+ errmsg = strerror(err);
+ error(_("%s: cannot read link: %s"), source, errmsg);
+ strbuf_release(&lpath);
+ }
+ /*
+ * _Not_ jumping to preserve_mode_ugid_time: symlinks
+ * don't have those
+ */
+ return 0;
+ }
+ if (S_ISBLK(source_stat.st_mode) ||
+ S_ISCHR(source_stat.st_mode) ||
+ S_ISSOCK(source_stat.st_mode) ||
+ S_ISFIFO(source_stat.st_mode)) {
+ if (mknod(dest, source_stat.st_mode, source_stat.st_rdev) < 0)
+ return sys_error(_("can't create '%s'"), dest);
+ } else
+ return sys_error(_("unrecognized file '%s' with mode %x"),
+ source, source_stat.st_mode);
+
+preserve_mode_ugid_time:
+
+ if (1 /*FILEUTILS_PRESERVE_STATUS*/) {
+ struct timeval times[2];
+
+ times[1].tv_sec = times[0].tv_sec = source_stat.st_mtime;
+ times[1].tv_usec = times[0].tv_usec = 0;
+ /* BTW, utimes sets usec-precision time - just FYI */
+ if (utimes(dest, times) < 0)
+ sys_error(_("can't preserve %s of '%s'"), "times", dest);
+ if (chown(dest, source_stat.st_uid, source_stat.st_gid) < 0) {
+ source_stat.st_mode &= ~(S_ISUID | S_ISGID);
+ sys_error(_("can't preserve %s of '%s'"), "ownership", dest);
+ }
+ if (chmod(dest, source_stat.st_mode) < 0)
+ sys_error(_("can't preserve %s of '%s'"), "permissions", dest);
+ }
+
+ return retval;
+}
+
+/*
+ * Return:
+ * -1 error, copy not made
+ * 0 copy is made
+ *
+ * Failures to preserve mode/owner/times are not reported in exit
+ * code. No support for preserving SELinux security context. Symlinks
+ * and hardlinks are preserved.
+ */
+int copy_dir_recursively(const char *source, const char *dest)
+{
+ int ret;
+ struct hashmap inode_map;
+
+ hashmap_init(&inode_map, inode_cmp, 1024);
+ ret = copy_dir_1(&inode_map, source, dest);
+ hashmap_free(&inode_map, 1);
+ return ret;
+}
--
2.7.0.377.g4cd97dd
next prev parent reply other threads:[~2016-02-03 9:36 UTC|newest]
Thread overview: 50+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-02-03 9:35 [PATCH 00/20] "git worktree move" preview Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 01/20] usage.c: move format processing out of die_errno() Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 02/20] usage.c: add sys_error() that prints strerror() automatically Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 03/20] path.c: add git_common_path() and strbuf_git_common_path() Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 04/20] path.c: add is_git_path_shared() Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` Nguyễn Thái Ngọc Duy [this message]
2016-02-03 9:35 ` [PATCH 06/20] worktree.c: use is_dot_or_dotdot() Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 07/20] worktree.c: store "id" instead of "git_dir" Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 08/20] worktree.c: add clear_worktree() Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 09/20] worktree.c: add find_worktree_by_path() Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 10/20] worktree.c: add is_main_worktree() Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 11/20] worktree.c: recognize no main worktree Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 12/20] worktree.c: add update_worktree_location() Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 13/20] worktree.c: add update_worktree_gitfile() Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 14/20] worktree.c: add collect_per_worktree_git_paths() Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 15/20] worktree: avoid 0{40}, too many zeroes, hard to read Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 16/20] worktree: simplify prefixing paths Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 17/20] worktree: add "move" commmand Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 18/20] worktree: refactor add_worktree() Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 19/20] worktree: move repo, simple case Nguyễn Thái Ngọc Duy
2016-02-03 9:35 ` [PATCH 20/20] worktree: move repo, convert main worktree Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 00/26] worktree lock, move, remove and unlock Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 01/26] usage.c: move format processing out of die_errno() Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 02/26] usage.c: add sys_error() that prints strerror() automatically Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 03/26] copy.c: import copy_file() from busybox Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 04/26] copy.c: delete unused code in copy_file() Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 05/26] copy.c: convert bb_(p)error_msg to (sys_)error Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 06/26] copy.c: style fix Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 07/26] copy.c: convert copy_file() to copy_dir_recursively() Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 08/26] completion: support git-worktree Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 09/26] git-worktree.txt: keep subcommand listing in alphabetical order Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 10/26] wrapper.c: allow to create an empty file with write_file() Nguyễn Thái Ngọc Duy
2016-02-17 22:29 ` Junio C Hamano
2016-02-18 0:49 ` Duy Nguyen
2016-02-16 13:29 ` [PATCH v2 11/26] path.c: add git_common_path() and strbuf_git_common_path() Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 12/26] worktree.c: use is_dot_or_dotdot() Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 13/26] worktree.c: store "id" instead of "git_dir" Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 14/26] worktree.c: add clear_worktree() Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 15/26] worktree.c: add find_worktree_by_path() Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 16/26] worktree.c: add is_main_worktree() Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 17/26] worktree.c: add validate_worktree() Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 18/26] worktree.c: add update_worktree_location() Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 19/26] worktree.c: add is_worktree_locked() Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 20/26] worktree: avoid 0{40}, too many zeroes, hard to read Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 21/26] worktree: simplify prefixing paths Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 22/26] worktree: add "lock" command Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 23/26] worktree: add "unlock" command Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 24/26] worktree: add "move" commmand Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 25/26] worktree move: accept destination as directory Nguyễn Thái Ngọc Duy
2016-02-16 13:29 ` [PATCH v2 26/26] worktree: add "remove" command Nguyễn Thái Ngọc Duy
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1454492150-10628-6-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).