* [mdadm PATCH] bcache: add bcache superblock @ 2012-05-11 20:39 Dan Williams [not found] ` <20120511203835.26301.1937.stgit-p8uTFz9XbKgaePuBGzJMJzMJUdESFZ8XQQ4Iyu8u01E@public.gmane.org> 0 siblings, 1 reply; 6+ messages in thread From: Dan Williams @ 2012-05-11 20:39 UTC (permalink / raw) To: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA Cc: linux-raid-u79uwXL29TY76Z2rM5mHXA, linux-bcache-u79uwXL29TY76Z2rM5mHXA This is a hybrid proposal for supporting bcache as a md device. Somewhat similar to the v1.x metadata format, where array assembly is handled in userspace, but managed in the kernel. In the bcache case it is an "external" metadata format, but then the expectation is that the kernel "bcache" personality takes over runtime maintenance of the metadata. The container id for bcache is the "cache_set". The subvolume is the backing device identifier. This initial version only supports the runtime static portion of the superblock, it will need to grow the ability to read the journal to report the backing devices associated with a given cache set (i.e. in the superblock backing devices know their cache_set container, but cache devices need to look elsewhere to find their backing devices). Cc: Kent Overstreet <koverstreet-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> Signed-off-by: Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> --- Assemble.c | 1 Makefile | 11 + bcache.h | 98 +++++++++ crc64.c | 129 +++++++++++ maps.c | 2 mdadm.h | 2 super-bcache.c | 634 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ util.c | 2 8 files changed, 873 insertions(+), 6 deletions(-) create mode 100644 bcache.h create mode 100644 crc64.c create mode 100644 super-bcache.c diff --git a/Assemble.c b/Assemble.c index fd94461..267a2ce 100644 --- a/Assemble.c +++ b/Assemble.c @@ -1594,6 +1594,7 @@ int assemble_container_content(struct supertype *st, int mdfd, } else switch(content->array.level) { case LEVEL_LINEAR: case LEVEL_MULTIPATH: + case LEVEL_BCACHE: case 0: err = sysfs_set_str(content, NULL, "array_state", "active"); diff --git a/Makefile b/Makefile index b8d363f..7886d13 100644 --- a/Makefile +++ b/Makefile @@ -103,8 +103,8 @@ OBJS = mdadm.o config.o policy.o mdstat.o ReadMe.o util.o maps.o lib.o \ Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \ Incremental.o \ mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \ - super-mbr.o super-gpt.o \ - restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o \ + super-mbr.o super-gpt.o super-bcache.o \ + restripe.o sysfs.o sha1.o mapfile.o crc32.o crc64.o sg_io.o msg.o \ platform-intel.o probe_roms.o CHECK_OBJS = restripe.o sysfs.o maps.o lib.o @@ -116,8 +116,8 @@ INCL = mdadm.h part.h bitmap.h MON_OBJS = mdmon.o monitor.o managemon.o util.o maps.o mdstat.o sysfs.o \ config.o policy.o lib.o \ Kill.o sg_io.o dlink.o ReadMe.o super0.o super1.o super-intel.o \ - super-mbr.o super-gpt.o \ - super-ddf.o sha1.o crc32.o msg.o bitmap.o \ + super-mbr.o super-gpt.o super-bcache.o \ + super-ddf.o sha1.o crc32.o crc64.o msg.o bitmap.o \ platform-intel.o probe_roms.o MON_SRCS = $(patsubst %.o,%.c,$(MON_OBJS)) @@ -128,7 +128,8 @@ STATICOBJS = pwgr.o ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c policy.c dlink.c util.c \ maps.c lib.c \ super0.c super1.c super-ddf.c super-intel.c sha1.c crc32.c sg_io.c mdstat.c \ - platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c + platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c \ + super-bcache.c crc64.c ASSEMBLE_AUTO_SRCS := mdopen.c ASSEMBLE_FLAGS:= $(CFLAGS) -DMDASSEMBLE ifdef MDASSEMBLE_AUTO diff --git a/bcache.h b/bcache.h new file mode 100644 index 0000000..765e369 --- /dev/null +++ b/bcache.h @@ -0,0 +1,98 @@ +#ifndef _BCACHE_H +#define _BCACHE_H + +#include <stdint.h> + +#define BITMASK(name, type, field, offset, size) \ +static inline uint64_t name(const type *k) \ +{ \ + uint64_t field = __le64_to_cpu(k->field); \ + return (field >> offset) & ~(((uint64_t) ~0) << size); \ +} \ + \ +static inline void SET_##name(type *k, uint64_t v) \ +{ \ + uint64_t field = __le64_to_cpu(k->field); \ + field &= ~(~((uint64_t) ~0 << size) << offset); \ + field |= v << offset; \ + k->field = __cpu_to_le64(field); \ +} + +static const char bcache_magic[] = { + 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, + 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 }; + +/* Version 1: Backing dev + * Version 2: Seed pointer into btree node checksum + * Version 3: Backing dev superblock has offset of start of data + */ + +#define BCACHE_SB_BDEV_VERSION 3 +#define BCACHE_SB_MAX_VERSION 3 + +#define SB_SECTOR 8 +#define SB_SIZE 16 /* default data_offset in bcache-tools (?) */ +#define SB_LABEL_SIZE 32 + +struct cache_sb { + uint64_t csum; + uint64_t offset; /* sector where this sb was written */ + uint64_t version; +#define CACHE_BACKING_DEV 1 + + uint8_t magic[16]; + + uint8_t uuid[16]; + union { + uint8_t set_uuid[16]; + uint64_t set_magic; + }; + uint8_t label[SB_LABEL_SIZE]; + + uint64_t flags; + uint64_t seq; + uint64_t pad[8]; + + uint64_t nbuckets; /* device size */ + uint16_t block_size; /* sectors */ + uint16_t bucket_size; /* sectors */ + + uint16_t nr_in_set; + uint16_t nr_this_dev; + + uint32_t last_mount; /* time_t */ + + uint16_t first_bucket; + uint16_t keys; /* number of journal buckets */ + uint64_t d[]; /* journal buckets */ +}; + +static inline int SB_BDEV(struct cache_sb *c) +{ + return __le64_to_cpu(c->version) == CACHE_BACKING_DEV; +} + +BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); +BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); +BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); + +BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); +#define CACHE_MODE_WRITETHROUGH 0U +#define CACHE_MODE_WRITEBACK 1U +#define CACHE_MODE_WRITEAROUND 2U +#define CACHE_MODE_NONE 3U +BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); +#define BDEV_STATE_NONE 0U +#define BDEV_STATE_CLEAN 1U +#define BDEV_STATE_DIRTY 2U +#define BDEV_STATE_STALE 3U + +inline uint64_t crc64(const void *_data, size_t len); + +#define node(i, j) ((void *) ((i)->d + (j))) +#define end(i) node(i, (i)->keys) + +#define csum_set(i) \ + crc64(((void *) (i)) + 8, ((void *) end(i)) - (((void *) (i)) + 8)) + +#endif diff --git a/crc64.c b/crc64.c new file mode 100644 index 0000000..8f37445 --- /dev/null +++ b/crc64.c @@ -0,0 +1,129 @@ +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> + +/* + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any + * use permitted, subject to terms of PostgreSQL license; see.) + + * If we have a 64-bit integer type, then a 64-bit CRC looks just like the + * usual sort of implementation. (See Ross Williams' excellent introduction + * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from + * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.) + * If we have no working 64-bit type, then fake it with two 32-bit registers. + * + * The present implementation is a normal (not "reflected", in Williams' + * terms) 64-bit CRC, using initial all-ones register contents and a final + * bit inversion. The chosen polynomial is borrowed from the DLT1 spec + * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM): + * + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + + * x^7 + x^4 + x + 1 +*/ + +static const uint64_t crc_table[256] = { + 0x0000000000000000ULL, 0x42F0E1EBA9EA3693ULL, 0x85E1C3D753D46D26ULL, + 0xC711223CFA3E5BB5ULL, 0x493366450E42ECDFULL, 0x0BC387AEA7A8DA4CULL, + 0xCCD2A5925D9681F9ULL, 0x8E224479F47CB76AULL, 0x9266CC8A1C85D9BEULL, + 0xD0962D61B56FEF2DULL, 0x17870F5D4F51B498ULL, 0x5577EEB6E6BB820BULL, + 0xDB55AACF12C73561ULL, 0x99A54B24BB2D03F2ULL, 0x5EB4691841135847ULL, + 0x1C4488F3E8F96ED4ULL, 0x663D78FF90E185EFULL, 0x24CD9914390BB37CULL, + 0xE3DCBB28C335E8C9ULL, 0xA12C5AC36ADFDE5AULL, 0x2F0E1EBA9EA36930ULL, + 0x6DFEFF5137495FA3ULL, 0xAAEFDD6DCD770416ULL, 0xE81F3C86649D3285ULL, + 0xF45BB4758C645C51ULL, 0xB6AB559E258E6AC2ULL, 0x71BA77A2DFB03177ULL, + 0x334A9649765A07E4ULL, 0xBD68D2308226B08EULL, 0xFF9833DB2BCC861DULL, + 0x388911E7D1F2DDA8ULL, 0x7A79F00C7818EB3BULL, 0xCC7AF1FF21C30BDEULL, + 0x8E8A101488293D4DULL, 0x499B3228721766F8ULL, 0x0B6BD3C3DBFD506BULL, + 0x854997BA2F81E701ULL, 0xC7B97651866BD192ULL, 0x00A8546D7C558A27ULL, + 0x4258B586D5BFBCB4ULL, 0x5E1C3D753D46D260ULL, 0x1CECDC9E94ACE4F3ULL, + 0xDBFDFEA26E92BF46ULL, 0x990D1F49C77889D5ULL, 0x172F5B3033043EBFULL, + 0x55DFBADB9AEE082CULL, 0x92CE98E760D05399ULL, 0xD03E790CC93A650AULL, + 0xAA478900B1228E31ULL, 0xE8B768EB18C8B8A2ULL, 0x2FA64AD7E2F6E317ULL, + 0x6D56AB3C4B1CD584ULL, 0xE374EF45BF6062EEULL, 0xA1840EAE168A547DULL, + 0x66952C92ECB40FC8ULL, 0x2465CD79455E395BULL, 0x3821458AADA7578FULL, + 0x7AD1A461044D611CULL, 0xBDC0865DFE733AA9ULL, 0xFF3067B657990C3AULL, + 0x711223CFA3E5BB50ULL, 0x33E2C2240A0F8DC3ULL, 0xF4F3E018F031D676ULL, + 0xB60301F359DBE0E5ULL, 0xDA050215EA6C212FULL, 0x98F5E3FE438617BCULL, + 0x5FE4C1C2B9B84C09ULL, 0x1D14202910527A9AULL, 0x93366450E42ECDF0ULL, + 0xD1C685BB4DC4FB63ULL, 0x16D7A787B7FAA0D6ULL, 0x5427466C1E109645ULL, + 0x4863CE9FF6E9F891ULL, 0x0A932F745F03CE02ULL, 0xCD820D48A53D95B7ULL, + 0x8F72ECA30CD7A324ULL, 0x0150A8DAF8AB144EULL, 0x43A04931514122DDULL, + 0x84B16B0DAB7F7968ULL, 0xC6418AE602954FFBULL, 0xBC387AEA7A8DA4C0ULL, + 0xFEC89B01D3679253ULL, 0x39D9B93D2959C9E6ULL, 0x7B2958D680B3FF75ULL, + 0xF50B1CAF74CF481FULL, 0xB7FBFD44DD257E8CULL, 0x70EADF78271B2539ULL, + 0x321A3E938EF113AAULL, 0x2E5EB66066087D7EULL, 0x6CAE578BCFE24BEDULL, + 0xABBF75B735DC1058ULL, 0xE94F945C9C3626CBULL, 0x676DD025684A91A1ULL, + 0x259D31CEC1A0A732ULL, 0xE28C13F23B9EFC87ULL, 0xA07CF2199274CA14ULL, + 0x167FF3EACBAF2AF1ULL, 0x548F120162451C62ULL, 0x939E303D987B47D7ULL, + 0xD16ED1D631917144ULL, 0x5F4C95AFC5EDC62EULL, 0x1DBC74446C07F0BDULL, + 0xDAAD56789639AB08ULL, 0x985DB7933FD39D9BULL, 0x84193F60D72AF34FULL, + 0xC6E9DE8B7EC0C5DCULL, 0x01F8FCB784FE9E69ULL, 0x43081D5C2D14A8FAULL, + 0xCD2A5925D9681F90ULL, 0x8FDAB8CE70822903ULL, 0x48CB9AF28ABC72B6ULL, + 0x0A3B7B1923564425ULL, 0x70428B155B4EAF1EULL, 0x32B26AFEF2A4998DULL, + 0xF5A348C2089AC238ULL, 0xB753A929A170F4ABULL, 0x3971ED50550C43C1ULL, + 0x7B810CBBFCE67552ULL, 0xBC902E8706D82EE7ULL, 0xFE60CF6CAF321874ULL, + 0xE224479F47CB76A0ULL, 0xA0D4A674EE214033ULL, 0x67C58448141F1B86ULL, + 0x253565A3BDF52D15ULL, 0xAB1721DA49899A7FULL, 0xE9E7C031E063ACECULL, + 0x2EF6E20D1A5DF759ULL, 0x6C0603E6B3B7C1CAULL, 0xF6FAE5C07D3274CDULL, + 0xB40A042BD4D8425EULL, 0x731B26172EE619EBULL, 0x31EBC7FC870C2F78ULL, + 0xBFC9838573709812ULL, 0xFD39626EDA9AAE81ULL, 0x3A28405220A4F534ULL, + 0x78D8A1B9894EC3A7ULL, 0x649C294A61B7AD73ULL, 0x266CC8A1C85D9BE0ULL, + 0xE17DEA9D3263C055ULL, 0xA38D0B769B89F6C6ULL, 0x2DAF4F0F6FF541ACULL, + 0x6F5FAEE4C61F773FULL, 0xA84E8CD83C212C8AULL, 0xEABE6D3395CB1A19ULL, + 0x90C79D3FEDD3F122ULL, 0xD2377CD44439C7B1ULL, 0x15265EE8BE079C04ULL, + 0x57D6BF0317EDAA97ULL, 0xD9F4FB7AE3911DFDULL, 0x9B041A914A7B2B6EULL, + 0x5C1538ADB04570DBULL, 0x1EE5D94619AF4648ULL, 0x02A151B5F156289CULL, + 0x4051B05E58BC1E0FULL, 0x87409262A28245BAULL, 0xC5B073890B687329ULL, + 0x4B9237F0FF14C443ULL, 0x0962D61B56FEF2D0ULL, 0xCE73F427ACC0A965ULL, + 0x8C8315CC052A9FF6ULL, 0x3A80143F5CF17F13ULL, 0x7870F5D4F51B4980ULL, + 0xBF61D7E80F251235ULL, 0xFD913603A6CF24A6ULL, 0x73B3727A52B393CCULL, + 0x31439391FB59A55FULL, 0xF652B1AD0167FEEAULL, 0xB4A25046A88DC879ULL, + 0xA8E6D8B54074A6ADULL, 0xEA16395EE99E903EULL, 0x2D071B6213A0CB8BULL, + 0x6FF7FA89BA4AFD18ULL, 0xE1D5BEF04E364A72ULL, 0xA3255F1BE7DC7CE1ULL, + 0x64347D271DE22754ULL, 0x26C49CCCB40811C7ULL, 0x5CBD6CC0CC10FAFCULL, + 0x1E4D8D2B65FACC6FULL, 0xD95CAF179FC497DAULL, 0x9BAC4EFC362EA149ULL, + 0x158E0A85C2521623ULL, 0x577EEB6E6BB820B0ULL, 0x906FC95291867B05ULL, + 0xD29F28B9386C4D96ULL, 0xCEDBA04AD0952342ULL, 0x8C2B41A1797F15D1ULL, + 0x4B3A639D83414E64ULL, 0x09CA82762AAB78F7ULL, 0x87E8C60FDED7CF9DULL, + 0xC51827E4773DF90EULL, 0x020905D88D03A2BBULL, 0x40F9E43324E99428ULL, + 0x2CFFE7D5975E55E2ULL, 0x6E0F063E3EB46371ULL, 0xA91E2402C48A38C4ULL, + 0xEBEEC5E96D600E57ULL, 0x65CC8190991CB93DULL, 0x273C607B30F68FAEULL, + 0xE02D4247CAC8D41BULL, 0xA2DDA3AC6322E288ULL, 0xBE992B5F8BDB8C5CULL, + 0xFC69CAB42231BACFULL, 0x3B78E888D80FE17AULL, 0x7988096371E5D7E9ULL, + 0xF7AA4D1A85996083ULL, 0xB55AACF12C735610ULL, 0x724B8ECDD64D0DA5ULL, + 0x30BB6F267FA73B36ULL, 0x4AC29F2A07BFD00DULL, 0x08327EC1AE55E69EULL, + 0xCF235CFD546BBD2BULL, 0x8DD3BD16FD818BB8ULL, 0x03F1F96F09FD3CD2ULL, + 0x41011884A0170A41ULL, 0x86103AB85A2951F4ULL, 0xC4E0DB53F3C36767ULL, + 0xD8A453A01B3A09B3ULL, 0x9A54B24BB2D03F20ULL, 0x5D45907748EE6495ULL, + 0x1FB5719CE1045206ULL, 0x919735E51578E56CULL, 0xD367D40EBC92D3FFULL, + 0x1476F63246AC884AULL, 0x568617D9EF46BED9ULL, 0xE085162AB69D5E3CULL, + 0xA275F7C11F7768AFULL, 0x6564D5FDE549331AULL, 0x279434164CA30589ULL, + 0xA9B6706FB8DFB2E3ULL, 0xEB46918411358470ULL, 0x2C57B3B8EB0BDFC5ULL, + 0x6EA7525342E1E956ULL, 0x72E3DAA0AA188782ULL, 0x30133B4B03F2B111ULL, + 0xF7021977F9CCEAA4ULL, 0xB5F2F89C5026DC37ULL, 0x3BD0BCE5A45A6B5DULL, + 0x79205D0E0DB05DCEULL, 0xBE317F32F78E067BULL, 0xFCC19ED95E6430E8ULL, + 0x86B86ED5267CDBD3ULL, 0xC4488F3E8F96ED40ULL, 0x0359AD0275A8B6F5ULL, + 0x41A94CE9DC428066ULL, 0xCF8B0890283E370CULL, 0x8D7BE97B81D4019FULL, + 0x4A6ACB477BEA5A2AULL, 0x089A2AACD2006CB9ULL, 0x14DEA25F3AF9026DULL, + 0x562E43B4931334FEULL, 0x913F6188692D6F4BULL, 0xD3CF8063C0C759D8ULL, + 0x5DEDC41A34BBEEB2ULL, 0x1F1D25F19D51D821ULL, 0xD80C07CD676F8394ULL, + 0x9AFCE626CE85B507ULL +}; + +inline uint64_t crc64(const void *_data, size_t len) +{ + uint64_t crc = 0xFFFFFFFFFFFFFFFFULL; + const unsigned char *data = _data; + + while (len--) { + int i = ((int) (crc >> 56) ^ *data++) & 0xFF; + crc = crc_table[i] ^ (crc << 8); + } + + return crc ^ 0xFFFFFFFFFFFFFFFFULL; +} diff --git a/maps.c b/maps.c index f2ba9a7..cedf548 100644 --- a/maps.c +++ b/maps.c @@ -94,6 +94,8 @@ mapping_t pers[] = { { "10", 10}, { "faulty", LEVEL_FAULTY}, { "container", LEVEL_CONTAINER}, + { "bcache", LEVEL_BCACHE}, + { "11", LEVEL_BCACHE}, { NULL, 0} }; diff --git a/mdadm.h b/mdadm.h index 3bcd052..a0ccff6 100644 --- a/mdadm.h +++ b/mdadm.h @@ -816,6 +816,7 @@ extern struct superswitch { extern struct superswitch super0, super1; extern struct superswitch super_imsm, super_ddf; extern struct superswitch mbr, gpt; +extern struct superswitch super_bcache; struct metadata_update { int len; @@ -1296,6 +1297,7 @@ static inline int xasprintf(char **strp, const char *fmt, ...) { #define LEVEL_MULTIPATH (-4) #define LEVEL_LINEAR (-1) #define LEVEL_FAULTY (-5) +#define LEVEL_BCACHE (0xb) /* kernel module doesn't know about these */ #define LEVEL_CONTAINER (-100) diff --git a/super-bcache.c b/super-bcache.c new file mode 100644 index 0000000..ec8f3db --- /dev/null +++ b/super-bcache.c @@ -0,0 +1,634 @@ +/* + * mdadm - bcache support + * + * Copyright (C) 2012 Intel Corporation + * + * bcache definitions copied from bcache-tools: + * git://evilpiepirate.org/~kent/bcache-tools.git + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ +#define HAVE_STDINT_H 1 +#include "mdadm.h" +#include "bcache.h" + +struct bcache_super { + union { + struct cache_sb *sb; + void *buf; + }; + struct dl { + int major, minor; + char *devname; + int fd; + } *disk; + int vol; + struct bcache_super *next; +}; + +enum { + /* FIXME this is a function of the bucket size */ + BCACHE_MAX_DEVICES = 2, +}; + +static int load_cache_sb(struct bcache_super *super, int keep_fd) +{ + struct dl *d = super->disk; + int rc, fd = d->fd; + struct cache_sb *c; + struct stat s; + + if (!keep_fd) + d->fd = -1; + + rc = fstat(fd, &s); + if (rc) + return rc; + d->major = major(s.st_rdev); + d->minor = minor(s.st_rdev); + + rc = posix_memalign(&super->buf, 4096, 4096); + if (rc) + return rc; + c = super->sb; + + if (pread(fd, c, 4096, SB_SECTOR << 9) != 4096) + return errno; + + if (csum_set(c) != __le64_to_cpu(c->csum)) + return ENODEV; + + if (memcmp(c->magic, bcache_magic, sizeof(bcache_magic)) != 0) + return ENODEV; + + return 0; +} + +static void __free_bcache(struct bcache_super *super) +{ + if (!super) + return; + + while (super) { + struct bcache_super *next = super->next; + struct dl *d = super->disk; + + d = super->disk; + if (d->fd >= 0) + close(d->fd); + free(d->devname); + free(d); + free(super->sb); + free(super); + super = next; + } +} + +static void free_bcache(struct supertype *st) +{ + struct bcache_super *super = st->sb; + + __free_bcache(super); + st->sb = NULL; +} + +#ifndef MDASSEMBLE +static void examine_bcache(struct supertype *st, char *homehost) +{ + const char *const cache_policies[] = { "lru", "fifo", "random", "" }; + const char *const bdev_states[] = { "none", "clean", "dirty", "stale" }; + const char *const bdev_modes[16] = { "writethrough", "writeback", "writearound", "none" }; + struct bcache_super *super = st->sb; + uint16_t first_bucket, bucket_size; + struct cache_sb *c = super->sb; + uint64_t nbuckets, csum; + unsigned long long sz; + char nbuf[64]; + + printf(" Magic : %s\n", + memcmp(bcache_magic, c->magic, 16) ? "<unknown>" : "<bcache>"); + printf(" Version : %d\n", (int) c->version); + printf(" Role : %s\n", SB_BDEV(c) ? "backing-device" : "cache"); + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':'); + printf(" Set UUID : %s\n", nbuf + 5); + __fname_from_uuid((int *) c->uuid, 0, nbuf, ':'); + printf(" Cache Devs : %u\n", c->nr_in_set); + /* FIXME: list all cache dev uuids in the load_container case */ + printf(" Device UUID : %s\n", nbuf + 5); + printf(" Flags :%s%s\n", CACHE_DISCARD(c) ? " discard" : "", + CACHE_SYNC(c) ? " sync" : ""); + if (SB_BDEV(c)) { + printf(" State : %s\n", bdev_states[BDEV_STATE(c)]); + printf(" Mode : %s\n", bdev_modes[BDEV_CACHE_MODE(c)]); + } else { + printf(" Policy : %s\n", cache_policies[CACHE_REPLACEMENT(c)]); + /* FIXME: add reporting of backing device uuids in the cache caase */ + } + printf(" Label : %.32s\n", c->label); + csum = __le64_to_cpu(c->csum); + nbuckets = __le64_to_cpu(c->nbuckets); + bucket_size = __le16_to_cpu(c->bucket_size); + first_bucket = __le16_to_cpu(c->first_bucket); + sz = (nbuckets - first_bucket) * bucket_size; + printf(" Device Size : %llu%s\n", sz, human_size(sz * 512)); + printf(" Bucket Size : %u\n", bucket_size); + printf(" Num Buckets : %llu\n", (unsigned long long) nbuckets); + printf(" this dev : %u\n", __le16_to_cpu(c->nr_this_dev)); + printf("First Bucket : %u\n", first_bucket); + printf(" Checksum : %llx %s\n", (unsigned long long) csum, + csum == csum_set(c) ? "correct" : "incorrect"); +} + +static void brief_examine_bcache(struct supertype *st, int verbose) +{ + struct bcache_super *super = st->sb; + struct cache_sb *c = super->sb; + char nbuf[64]; + + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':'); + printf("ARRAY metadata=bcache UUID=%s\n", nbuf + 5); +} + +static void brief_examine_subarrays_bcache(struct supertype *st, int verbose) +{ + struct bcache_super *super = st->sb; + struct cache_sb *c = super->sb; + char nbuf[64], nbuf1[64]; + + /* FIXME this needs to parse the cache device journal to find + * and report the backing dev uuid list + */ + if (!SB_BDEV(c)) + return; + + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':'); + __fname_from_uuid((int *) c->uuid, 0, nbuf1, ':'); + + printf("ARRAY container=%s UUID=%s\n", nbuf + 5, nbuf1 + 5); +} + +static void export_examine_bcache(struct supertype *st) +{ + struct bcache_super *super = st->sb; + struct cache_sb *c = super->sb; + char nbuf[64]; + + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':'); + printf("MD_METADATA=bcache\n"); + printf("MD_LEVEL=container\n"); + printf("MD_UUID=%s\n", nbuf+5); + printf("MD_DEVICES=%d\n", __le16_to_cpu(c->nr_in_set) + 1); +} + +static void detail_bcache(struct supertype *st, char *homehost) +{ + struct bcache_super *super = st->sb; + struct cache_sb *c = super->sb; + char nbuf[64]; + + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':'); + printf("\n UUID : %s\n", nbuf + 5); +} + +static void brief_detail_bcache(struct supertype *st) +{ + struct bcache_super *super = st->sb; + struct cache_sb *c = super->sb; + char nbuf[64]; + + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':'); + printf(" UUID=%s", nbuf + 5); +} + +static struct bcache_super *alloc_super(const char *func) +{ + struct bcache_super *super = calloc(1, sizeof(*super)); + struct dl *d = calloc(1, sizeof(*d)); + + if (!super || !d) { + fprintf(stderr, Name "%s: %s failed\n", func, __func__); + free(super); + free(d); + return NULL; + } + + super->vol = -1; + super->disk = d; + + return super; +} + +static int load_container_bcache(struct supertype *st, int fd, char *devname) +{ + struct bcache_super *list = NULL; + int rc, i, cdev = 0, bdev = 0; + int devnum = fd2devnum(fd); + struct mdinfo *sra, *sd; + + sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE); + if (!sra) + return 1; + + if (sra->array.major_version != -1 || + sra->array.minor_version != -2 || + strcmp(sra->text_version, "bcache") != 0) { + rc = 1; + goto error; + } + + for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) { + struct bcache_super *super = alloc_super(__func__); + struct cache_sb *c; + char nm[32]; + int fd; + + rc = 1; + if (!super) + goto error; + super->next = list; + list = super; + + rc = 2; + sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); + fd = dev_open(nm, O_RDWR); + if (fd < 0) + goto error; + + super->disk->fd = fd; + rc = load_cache_sb(super, 1); + if (rc) + goto error; + c = super->sb; + if (SB_BDEV(c)) + bdev++; + else + cdev++; + } + rc = 0; + + /* FIXME disambiguate multiple bdevs per set, support multiple + * cache devices + */ + if (bdev > 1) { + fprintf(stderr, Name ": %d backing devices detected\n", bdev); + rc = 3; + } + if (cdev > 1) { + fprintf(stderr, Name ": %d cache devices detected\n", cdev); + rc = 3; + } + if (rc) + goto error; + st->sb = list; + list = NULL; + +error: + if (list) + __free_bcache(list); + sysfs_free(sra); + + st->container_dev = devnum; + if (rc == 0 && st->ss == NULL) { + st->ss = &super_bcache; + st->minor_version = 0; + st->max_devs = BCACHE_MAX_DEVICES; + } + return rc; +} +#endif + +static int load_bcache(struct supertype *st, int fd, char *devname) +{ + struct bcache_super *super; + struct dl *d; + int rc; + + free_bcache(st); + + super = alloc_super(__func__); + if (!super) + return 1; + + st->sb = super; + d = super->disk; + d->devname = devname ? strdup(devname) : NULL; + d->fd = fd; + rc = load_cache_sb(super, 0); + if (rc) { + free_bcache(st); + if (!devname) + return rc; + fprintf(stderr, Name ": %s failed on %s (%s)\n", __func__, + devname, strerror(rc)); + return rc; + } + + if (st->ss == NULL) { + st->ss = &super_bcache; + st->minor_version = 0; + st->max_devs = BCACHE_MAX_DEVICES; + } + + return 0; +} + +static int store_bcache(struct supertype *st, int fd) +{ + struct bcache_super *super = st->sb; + struct cache_sb *c = super->sb; + + if (!c) + return 1; + + if (pwrite(fd, c, sizeof(*c), SB_SECTOR << 9) != sizeof(*c)) + return 1; + + return 0; +} + +static int compare_bcache(struct supertype *st, struct supertype *tst) +{ + struct bcache_super *a = st->sb; + struct bcache_super *b = tst->sb; + + if (!st->sb) { + st->sb = tst->sb; + tst->sb = NULL; + return 0; + } + + if (memcmp(a->sb->set_uuid, b->sb->set_uuid, sizeof(b->sb->set_uuid)) != 0) + return 2; + + return 0; +} + +static __u64 avail_size_bcache(struct supertype *st, __u64 devsize) +{ + /* 4k from start, 8k min data offset */ + const uint32_t reserved_sectors = (4+8) * 2; + + if (devsize < reserved_sectors) + return 0; + + return devsize - reserved_sectors; +} + +static struct supertype *match_metadata_desc_bcache(char *arg) +{ + struct supertype *st; + + if (strcmp(arg, "bcache") != 0 && + strcmp(arg, "default") != 0) + return NULL; + + st = calloc(1, sizeof(*st)); + if (!st) + return NULL; + st->container_dev = NoMdDev; + st->ss = &super_bcache; + st->max_devs = BCACHE_MAX_DEVICES; + st->minor_version = 0; + st->sb = NULL; + + return st; +} + +static int match_home_bcache(struct supertype *st, char *homehost) +{ + /* the bcache superblock does not specify any host + * identification information. maybe it should... + */ + + return -1; +} + +static void uuid_from_bcache(struct supertype *st, int uuid[4]) +{ + struct bcache_super *super = st->sb; + struct cache_sb *c = super->sb; + + memcpy(uuid, c->set_uuid, sizeof(c->set_uuid)); +} + +static void getinfo_bcache_volume(struct supertype *st, struct mdinfo *info, int map_disks, char *dmap) +{ + char *name = devnum2devname(st->container_dev); + struct bcache_super *super = st->sb; + uint16_t bucket_size, first_bucket; + struct cache_sb *c = super->sb; + unsigned long long sz; + uint64_t nbuckets; + + nbuckets = __le64_to_cpu(c->nbuckets); + bucket_size = __le16_to_cpu(c->bucket_size); + first_bucket = __le16_to_cpu(c->first_bucket); + sz = (nbuckets - first_bucket) * bucket_size; + + info->container_member = super->vol; + info->custom_array_size = sz; + info->component_size = sz; + info->recovery_start = MaxSector; + info->data_offset = SB_SECTOR + SB_SIZE; + sprintf(info->text_version, "/%s/%d", name, super->vol); + snprintf(info->name, sizeof(info->name), "%s", c->label); + memcpy(info->uuid, c->uuid, sizeof(c->uuid)); + + info->array.raid_disks = __le16_to_cpu(c->nr_in_set) + 1; + info->array.level = LEVEL_BCACHE; + info->array.layout = 0; + info->array.md_minor = -1; + info->array.ctime = 0; + info->array.utime = 0; + info->array.chunk_size = bucket_size * 512; + info->array.major_version = -1; + info->array.minor_version = -2; + + info->disk.major = 0; + info->disk.minor = 0; + info->disk.raid_disk = SB_BDEV(c); + info->disk.number = SB_BDEV(c); + info->disk.state = 1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC; +} + +static void getinfo_bcache(struct supertype *st, struct mdinfo *info, char *dmap) +{ + int i, cset, bdev, map_disks = info->array.raid_disks; + struct bcache_super *super = st->sb; + struct cache_sb *c = super->sb; + + memset(info, 0, sizeof(*info)); + + if (super->vol >= 0) + return getinfo_bcache_volume(st, info, map_disks, dmap); + + /* make Assemble choose the cache target */ + info->events = SB_BDEV(c); + info->recovery_start = MaxSector; + info->data_offset = SB_SECTOR; + info->component_size = SB_SIZE; + strcpy(info->text_version, "bcache"); + memcpy(info->uuid, c->set_uuid, sizeof(c->set_uuid)); + + info->array.raid_disks = __le16_to_cpu(c->nr_in_set) + 1; + info->array.level = LEVEL_CONTAINER; + info->array.layout = 0; + info->array.md_minor = -1; + info->array.ctime = 0; + info->array.utime = 0; + info->array.chunk_size = __le16_to_cpu(c->bucket_size) * 512; + info->array.major_version = -1; + info->array.minor_version = -2; + + info->disk.major = 0; + info->disk.minor = 0; + info->disk.raid_disk = SB_BDEV(c); + info->disk.number = SB_BDEV(c); + /* FIXME: need bcache superblock to identify failed devices */ + info->disk.state = 1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC; + + /* FIXME need to parse the journal uuid_bucket to understand + * which cache devs are consistent with the set + */ + for (i = 0; dmap && i < map_disks; i++) + dmap[i] = 1; + + cset = 0; + bdev = 0; + while (super) { + c = super->sb; + + /* FIXME filter out-of-sync devices */ + if (SB_BDEV(c)) + bdev++; + else + cset++; + super = super->next; + } + + if (cset + bdev == __le16_to_cpu(c->nr_in_set) + 1) + info->container_enough = 1; + else + info->container_enough = -1; +} + +static int update_bcache(struct supertype *st, struct mdinfo *i, char *update, + char *devname, int verbose, int uuid_set, char *homehost) +{ + /* FIXME */ + if (strcmp(update, "grow") == 0) { + return 0; + } else if (strcmp(update, "resync") == 0) { + return 0; + } else if (strcmp(update, "homehost") == 0) { + return -1; + } else if (strcmp(update, "name") == 0) { + return -1; + } else if (strcmp(update, "_reshape_progress") == 0) { + return 0; + } else if (strcmp(update, "assemble") == 0 ) { + return 0; + } else { + return -1; + } +} + +static struct mdinfo *container_content_bcache(struct supertype *st, char *subarray) +{ + struct bcache_super *super = st->sb; + struct mdinfo *info, *disk = NULL; + char *ep; + + info = calloc(1, sizeof(*info)); + if (!info) { + fprintf(stderr, Name ": failed to allocate %zu bytes\n", + sizeof(*info)); + return NULL; + } + + /* don't support multiple backing disks per cache set */ + if (subarray && (strtoul(subarray, &ep, 10) > 0 || *ep != '\0')) + goto error; + + super->vol = 0; + getinfo_bcache(st, info, NULL); + + for (; super; super = super->next) { + struct dl *d = super->disk; + struct cache_sb *c = super->sb; + + disk = calloc(1, sizeof(*disk)); + if (!disk) { + fprintf(stderr, Name ": failed to allocate disk\n"); + goto error; + } + disk->next = info->devs; + info->devs = disk; + + disk->disk.number = SB_BDEV(c); + disk->disk.raid_disk = SB_BDEV(c); + disk->disk.major = d->major; + disk->disk.minor = d->minor; + disk->recovery_start = MaxSector; + disk->disk.state = 1 << MD_DISK_ACTIVE; + disk->data_offset = info->data_offset; + disk->component_size = info->component_size; + + info->array.working_disks++; + } + + return info; + + error: + disk = info->devs; + while (disk) { + struct mdinfo *next = disk->next; + + free(disk); + disk = next; + } + + free(info); + return NULL; +} + + +struct superswitch super_bcache = { +#ifndef MDASSEMBLE + .examine_super = examine_bcache, + .brief_examine_super = brief_examine_bcache, + .brief_examine_subarrays = brief_examine_subarrays_bcache, + .export_examine_super = export_examine_bcache, + .detail_super = detail_bcache, + .brief_detail_super = brief_detail_bcache, + .load_container = load_container_bcache, +#endif + .match_home = match_home_bcache, + .uuid_from_super = uuid_from_bcache, + .getinfo_super = getinfo_bcache, + .update_super = update_bcache, + + .avail_size = avail_size_bcache, + + .compare_super = compare_bcache, + + .load_super = load_bcache, + .store_super = store_bcache, + .free_super = free_bcache, + .match_metadata_desc = match_metadata_desc_bcache, + .container_content = container_content_bcache, + + .external = 1, + .name = "bcache", +}; diff --git a/util.c b/util.c index 6985a70..d9e49cf 100644 --- a/util.c +++ b/util.c @@ -919,7 +919,7 @@ struct superswitch *superlist[] = { &super0, &super1, &super_ddf, &super_imsm, - &mbr, &gpt, + &mbr, &gpt, &super_bcache, NULL }; #if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) ^ permalink raw reply related [flat|nested] 6+ messages in thread
[parent not found: <20120511203835.26301.1937.stgit-p8uTFz9XbKgaePuBGzJMJzMJUdESFZ8XQQ4Iyu8u01E@public.gmane.org>]
* Re: [mdadm PATCH] bcache: add bcache superblock [not found] ` <20120511203835.26301.1937.stgit-p8uTFz9XbKgaePuBGzJMJzMJUdESFZ8XQQ4Iyu8u01E@public.gmane.org> @ 2012-05-12 7:38 ` Jack Wang 2012-05-15 0:04 ` Mark Hills 1 sibling, 0 replies; 6+ messages in thread From: Jack Wang @ 2012-05-12 7:38 UTC (permalink / raw) To: Dan Williams Cc: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA, linux-raid-u79uwXL29TY76Z2rM5mHXA, linux-bcache-u79uwXL29TY76Z2rM5mHXA Hi Dan, So this is the alternate interface for bcache tools using mdadm to manage bcache? If so, could you give a example of how to using this. Best regards. Jack 2012/5/12 Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>: > This is a hybrid proposal for supporting bcache as a md device. > Somewhat similar to the v1.x metadata format, where array assembly is > handled in userspace, but managed in the kernel. In the bcache case it > is an "external" metadata format, but then the expectation is that the > kernel "bcache" personality takes over runtime maintenance of the > metadata. > > The container id for bcache is the "cache_set". The subvolume is the > backing device identifier. > > This initial version only supports the runtime static portion of the > superblock, it will need to grow the ability to read the journal to > report the backing devices associated with a given cache set (i.e. in > the superblock backing devices know their cache_set container, but cache > devices need to look elsewhere to find their backing devices). > > Cc: Kent Overstreet <koverstreet-hpIqsD4AKlfQT0dZR+AlfA@public.gmane.org> > Signed-off-by: Dan Williams <dan.j.williams-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org> > --- > Assemble.c | 1 > Makefile | 11 + > bcache.h | 98 +++++++++ > crc64.c | 129 +++++++++++ > maps.c | 2 > mdadm.h | 2 > super-bcache.c | 634 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ > util.c | 2 > 8 files changed, 873 insertions(+), 6 deletions(-) > create mode 100644 bcache.h > create mode 100644 crc64.c > create mode 100644 super-bcache.c > > diff --git a/Assemble.c b/Assemble.c > index fd94461..267a2ce 100644 > --- a/Assemble.c > +++ b/Assemble.c > @@ -1594,6 +1594,7 @@ int assemble_container_content(struct supertype *st, int mdfd, > } else switch(content->array.level) { > case LEVEL_LINEAR: > case LEVEL_MULTIPATH: > + case LEVEL_BCACHE: > case 0: > err = sysfs_set_str(content, NULL, "array_state", > "active"); > diff --git a/Makefile b/Makefile > index b8d363f..7886d13 100644 > --- a/Makefile > +++ b/Makefile > @@ -103,8 +103,8 @@ OBJS = mdadm.o config.o policy.o mdstat.o ReadMe.o util.o maps.o lib.o \ > Create.o Detail.o Examine.o Grow.o Monitor.o dlink.o Kill.o Query.o \ > Incremental.o \ > mdopen.o super0.o super1.o super-ddf.o super-intel.o bitmap.o \ > - super-mbr.o super-gpt.o \ > - restripe.o sysfs.o sha1.o mapfile.o crc32.o sg_io.o msg.o \ > + super-mbr.o super-gpt.o super-bcache.o \ > + restripe.o sysfs.o sha1.o mapfile.o crc32.o crc64.o sg_io.o msg.o \ > platform-intel.o probe_roms.o > > CHECK_OBJS = restripe.o sysfs.o maps.o lib.o > @@ -116,8 +116,8 @@ INCL = mdadm.h part.h bitmap.h > MON_OBJS = mdmon.o monitor.o managemon.o util.o maps.o mdstat.o sysfs.o \ > config.o policy.o lib.o \ > Kill.o sg_io.o dlink.o ReadMe.o super0.o super1.o super-intel.o \ > - super-mbr.o super-gpt.o \ > - super-ddf.o sha1.o crc32.o msg.o bitmap.o \ > + super-mbr.o super-gpt.o super-bcache.o \ > + super-ddf.o sha1.o crc32.o crc64.o msg.o bitmap.o \ > platform-intel.o probe_roms.o > > MON_SRCS = $(patsubst %.o,%.c,$(MON_OBJS)) > @@ -128,7 +128,8 @@ STATICOBJS = pwgr.o > ASSEMBLE_SRCS := mdassemble.c Assemble.c Manage.c config.c policy.c dlink.c util.c \ > maps.c lib.c \ > super0.c super1.c super-ddf.c super-intel.c sha1.c crc32.c sg_io.c mdstat.c \ > - platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c > + platform-intel.c probe_roms.c sysfs.c super-mbr.c super-gpt.c \ > + super-bcache.c crc64.c > ASSEMBLE_AUTO_SRCS := mdopen.c > ASSEMBLE_FLAGS:= $(CFLAGS) -DMDASSEMBLE > ifdef MDASSEMBLE_AUTO > diff --git a/bcache.h b/bcache.h > new file mode 100644 > index 0000000..765e369 > --- /dev/null > +++ b/bcache.h > @@ -0,0 +1,98 @@ > +#ifndef _BCACHE_H > +#define _BCACHE_H > + > +#include <stdint.h> > + > +#define BITMASK(name, type, field, offset, size) \ > +static inline uint64_t name(const type *k) \ > +{ \ > + uint64_t field = __le64_to_cpu(k->field); \ > + return (field >> offset) & ~(((uint64_t) ~0) << size); \ > +} \ > + \ > +static inline void SET_##name(type *k, uint64_t v) \ > +{ \ > + uint64_t field = __le64_to_cpu(k->field); \ > + field &= ~(~((uint64_t) ~0 << size) << offset); \ > + field |= v << offset; \ > + k->field = __cpu_to_le64(field); \ > +} > + > +static const char bcache_magic[] = { > + 0xc6, 0x85, 0x73, 0xf6, 0x4e, 0x1a, 0x45, 0xca, > + 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81 }; > + > +/* Version 1: Backing dev > + * Version 2: Seed pointer into btree node checksum > + * Version 3: Backing dev superblock has offset of start of data > + */ > + > +#define BCACHE_SB_BDEV_VERSION 3 > +#define BCACHE_SB_MAX_VERSION 3 > + > +#define SB_SECTOR 8 > +#define SB_SIZE 16 /* default data_offset in bcache-tools (?) */ > +#define SB_LABEL_SIZE 32 > + > +struct cache_sb { > + uint64_t csum; > + uint64_t offset; /* sector where this sb was written */ > + uint64_t version; > +#define CACHE_BACKING_DEV 1 > + > + uint8_t magic[16]; > + > + uint8_t uuid[16]; > + union { > + uint8_t set_uuid[16]; > + uint64_t set_magic; > + }; > + uint8_t label[SB_LABEL_SIZE]; > + > + uint64_t flags; > + uint64_t seq; > + uint64_t pad[8]; > + > + uint64_t nbuckets; /* device size */ > + uint16_t block_size; /* sectors */ > + uint16_t bucket_size; /* sectors */ > + > + uint16_t nr_in_set; > + uint16_t nr_this_dev; > + > + uint32_t last_mount; /* time_t */ > + > + uint16_t first_bucket; > + uint16_t keys; /* number of journal buckets */ > + uint64_t d[]; /* journal buckets */ > +}; > + > +static inline int SB_BDEV(struct cache_sb *c) > +{ > + return __le64_to_cpu(c->version) == CACHE_BACKING_DEV; > +} > + > +BITMASK(CACHE_SYNC, struct cache_sb, flags, 0, 1); > +BITMASK(CACHE_DISCARD, struct cache_sb, flags, 1, 1); > +BITMASK(CACHE_REPLACEMENT, struct cache_sb, flags, 2, 3); > + > +BITMASK(BDEV_CACHE_MODE, struct cache_sb, flags, 0, 4); > +#define CACHE_MODE_WRITETHROUGH 0U > +#define CACHE_MODE_WRITEBACK 1U > +#define CACHE_MODE_WRITEAROUND 2U > +#define CACHE_MODE_NONE 3U > +BITMASK(BDEV_STATE, struct cache_sb, flags, 61, 2); > +#define BDEV_STATE_NONE 0U > +#define BDEV_STATE_CLEAN 1U > +#define BDEV_STATE_DIRTY 2U > +#define BDEV_STATE_STALE 3U > + > +inline uint64_t crc64(const void *_data, size_t len); > + > +#define node(i, j) ((void *) ((i)->d + (j))) > +#define end(i) node(i, (i)->keys) > + > +#define csum_set(i) \ > + crc64(((void *) (i)) + 8, ((void *) end(i)) - (((void *) (i)) + 8)) > + > +#endif > diff --git a/crc64.c b/crc64.c > new file mode 100644 > index 0000000..8f37445 > --- /dev/null > +++ b/crc64.c > @@ -0,0 +1,129 @@ > +#define _GNU_SOURCE > + > +#include <stdio.h> > +#include <stdlib.h> > +#include <stdint.h> > +#include <unistd.h> > + > +/* > + * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group (Any > + * use permitted, subject to terms of PostgreSQL license; see.) > + > + * If we have a 64-bit integer type, then a 64-bit CRC looks just like the > + * usual sort of implementation. (See Ross Williams' excellent introduction > + * A PAINLESS GUIDE TO CRC ERROR DETECTION ALGORITHMS, available from > + * ftp://ftp.rocksoft.com/papers/crc_v3.txt or several other net sites.) > + * If we have no working 64-bit type, then fake it with two 32-bit registers. > + * > + * The present implementation is a normal (not "reflected", in Williams' > + * terms) 64-bit CRC, using initial all-ones register contents and a final > + * bit inversion. The chosen polynomial is borrowed from the DLT1 spec > + * (ECMA-182, available from http://www.ecma.ch/ecma1/STAND/ECMA-182.HTM): > + * > + * x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 + > + * x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 + > + * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 + > + * x^7 + x^4 + x + 1 > +*/ > + > +static const uint64_t crc_table[256] = { > + 0x0000000000000000ULL, 0x42F0E1EBA9EA3693ULL, 0x85E1C3D753D46D26ULL, > + 0xC711223CFA3E5BB5ULL, 0x493366450E42ECDFULL, 0x0BC387AEA7A8DA4CULL, > + 0xCCD2A5925D9681F9ULL, 0x8E224479F47CB76AULL, 0x9266CC8A1C85D9BEULL, > + 0xD0962D61B56FEF2DULL, 0x17870F5D4F51B498ULL, 0x5577EEB6E6BB820BULL, > + 0xDB55AACF12C73561ULL, 0x99A54B24BB2D03F2ULL, 0x5EB4691841135847ULL, > + 0x1C4488F3E8F96ED4ULL, 0x663D78FF90E185EFULL, 0x24CD9914390BB37CULL, > + 0xE3DCBB28C335E8C9ULL, 0xA12C5AC36ADFDE5AULL, 0x2F0E1EBA9EA36930ULL, > + 0x6DFEFF5137495FA3ULL, 0xAAEFDD6DCD770416ULL, 0xE81F3C86649D3285ULL, > + 0xF45BB4758C645C51ULL, 0xB6AB559E258E6AC2ULL, 0x71BA77A2DFB03177ULL, > + 0x334A9649765A07E4ULL, 0xBD68D2308226B08EULL, 0xFF9833DB2BCC861DULL, > + 0x388911E7D1F2DDA8ULL, 0x7A79F00C7818EB3BULL, 0xCC7AF1FF21C30BDEULL, > + 0x8E8A101488293D4DULL, 0x499B3228721766F8ULL, 0x0B6BD3C3DBFD506BULL, > + 0x854997BA2F81E701ULL, 0xC7B97651866BD192ULL, 0x00A8546D7C558A27ULL, > + 0x4258B586D5BFBCB4ULL, 0x5E1C3D753D46D260ULL, 0x1CECDC9E94ACE4F3ULL, > + 0xDBFDFEA26E92BF46ULL, 0x990D1F49C77889D5ULL, 0x172F5B3033043EBFULL, > + 0x55DFBADB9AEE082CULL, 0x92CE98E760D05399ULL, 0xD03E790CC93A650AULL, > + 0xAA478900B1228E31ULL, 0xE8B768EB18C8B8A2ULL, 0x2FA64AD7E2F6E317ULL, > + 0x6D56AB3C4B1CD584ULL, 0xE374EF45BF6062EEULL, 0xA1840EAE168A547DULL, > + 0x66952C92ECB40FC8ULL, 0x2465CD79455E395BULL, 0x3821458AADA7578FULL, > + 0x7AD1A461044D611CULL, 0xBDC0865DFE733AA9ULL, 0xFF3067B657990C3AULL, > + 0x711223CFA3E5BB50ULL, 0x33E2C2240A0F8DC3ULL, 0xF4F3E018F031D676ULL, > + 0xB60301F359DBE0E5ULL, 0xDA050215EA6C212FULL, 0x98F5E3FE438617BCULL, > + 0x5FE4C1C2B9B84C09ULL, 0x1D14202910527A9AULL, 0x93366450E42ECDF0ULL, > + 0xD1C685BB4DC4FB63ULL, 0x16D7A787B7FAA0D6ULL, 0x5427466C1E109645ULL, > + 0x4863CE9FF6E9F891ULL, 0x0A932F745F03CE02ULL, 0xCD820D48A53D95B7ULL, > + 0x8F72ECA30CD7A324ULL, 0x0150A8DAF8AB144EULL, 0x43A04931514122DDULL, > + 0x84B16B0DAB7F7968ULL, 0xC6418AE602954FFBULL, 0xBC387AEA7A8DA4C0ULL, > + 0xFEC89B01D3679253ULL, 0x39D9B93D2959C9E6ULL, 0x7B2958D680B3FF75ULL, > + 0xF50B1CAF74CF481FULL, 0xB7FBFD44DD257E8CULL, 0x70EADF78271B2539ULL, > + 0x321A3E938EF113AAULL, 0x2E5EB66066087D7EULL, 0x6CAE578BCFE24BEDULL, > + 0xABBF75B735DC1058ULL, 0xE94F945C9C3626CBULL, 0x676DD025684A91A1ULL, > + 0x259D31CEC1A0A732ULL, 0xE28C13F23B9EFC87ULL, 0xA07CF2199274CA14ULL, > + 0x167FF3EACBAF2AF1ULL, 0x548F120162451C62ULL, 0x939E303D987B47D7ULL, > + 0xD16ED1D631917144ULL, 0x5F4C95AFC5EDC62EULL, 0x1DBC74446C07F0BDULL, > + 0xDAAD56789639AB08ULL, 0x985DB7933FD39D9BULL, 0x84193F60D72AF34FULL, > + 0xC6E9DE8B7EC0C5DCULL, 0x01F8FCB784FE9E69ULL, 0x43081D5C2D14A8FAULL, > + 0xCD2A5925D9681F90ULL, 0x8FDAB8CE70822903ULL, 0x48CB9AF28ABC72B6ULL, > + 0x0A3B7B1923564425ULL, 0x70428B155B4EAF1EULL, 0x32B26AFEF2A4998DULL, > + 0xF5A348C2089AC238ULL, 0xB753A929A170F4ABULL, 0x3971ED50550C43C1ULL, > + 0x7B810CBBFCE67552ULL, 0xBC902E8706D82EE7ULL, 0xFE60CF6CAF321874ULL, > + 0xE224479F47CB76A0ULL, 0xA0D4A674EE214033ULL, 0x67C58448141F1B86ULL, > + 0x253565A3BDF52D15ULL, 0xAB1721DA49899A7FULL, 0xE9E7C031E063ACECULL, > + 0x2EF6E20D1A5DF759ULL, 0x6C0603E6B3B7C1CAULL, 0xF6FAE5C07D3274CDULL, > + 0xB40A042BD4D8425EULL, 0x731B26172EE619EBULL, 0x31EBC7FC870C2F78ULL, > + 0xBFC9838573709812ULL, 0xFD39626EDA9AAE81ULL, 0x3A28405220A4F534ULL, > + 0x78D8A1B9894EC3A7ULL, 0x649C294A61B7AD73ULL, 0x266CC8A1C85D9BE0ULL, > + 0xE17DEA9D3263C055ULL, 0xA38D0B769B89F6C6ULL, 0x2DAF4F0F6FF541ACULL, > + 0x6F5FAEE4C61F773FULL, 0xA84E8CD83C212C8AULL, 0xEABE6D3395CB1A19ULL, > + 0x90C79D3FEDD3F122ULL, 0xD2377CD44439C7B1ULL, 0x15265EE8BE079C04ULL, > + 0x57D6BF0317EDAA97ULL, 0xD9F4FB7AE3911DFDULL, 0x9B041A914A7B2B6EULL, > + 0x5C1538ADB04570DBULL, 0x1EE5D94619AF4648ULL, 0x02A151B5F156289CULL, > + 0x4051B05E58BC1E0FULL, 0x87409262A28245BAULL, 0xC5B073890B687329ULL, > + 0x4B9237F0FF14C443ULL, 0x0962D61B56FEF2D0ULL, 0xCE73F427ACC0A965ULL, > + 0x8C8315CC052A9FF6ULL, 0x3A80143F5CF17F13ULL, 0x7870F5D4F51B4980ULL, > + 0xBF61D7E80F251235ULL, 0xFD913603A6CF24A6ULL, 0x73B3727A52B393CCULL, > + 0x31439391FB59A55FULL, 0xF652B1AD0167FEEAULL, 0xB4A25046A88DC879ULL, > + 0xA8E6D8B54074A6ADULL, 0xEA16395EE99E903EULL, 0x2D071B6213A0CB8BULL, > + 0x6FF7FA89BA4AFD18ULL, 0xE1D5BEF04E364A72ULL, 0xA3255F1BE7DC7CE1ULL, > + 0x64347D271DE22754ULL, 0x26C49CCCB40811C7ULL, 0x5CBD6CC0CC10FAFCULL, > + 0x1E4D8D2B65FACC6FULL, 0xD95CAF179FC497DAULL, 0x9BAC4EFC362EA149ULL, > + 0x158E0A85C2521623ULL, 0x577EEB6E6BB820B0ULL, 0x906FC95291867B05ULL, > + 0xD29F28B9386C4D96ULL, 0xCEDBA04AD0952342ULL, 0x8C2B41A1797F15D1ULL, > + 0x4B3A639D83414E64ULL, 0x09CA82762AAB78F7ULL, 0x87E8C60FDED7CF9DULL, > + 0xC51827E4773DF90EULL, 0x020905D88D03A2BBULL, 0x40F9E43324E99428ULL, > + 0x2CFFE7D5975E55E2ULL, 0x6E0F063E3EB46371ULL, 0xA91E2402C48A38C4ULL, > + 0xEBEEC5E96D600E57ULL, 0x65CC8190991CB93DULL, 0x273C607B30F68FAEULL, > + 0xE02D4247CAC8D41BULL, 0xA2DDA3AC6322E288ULL, 0xBE992B5F8BDB8C5CULL, > + 0xFC69CAB42231BACFULL, 0x3B78E888D80FE17AULL, 0x7988096371E5D7E9ULL, > + 0xF7AA4D1A85996083ULL, 0xB55AACF12C735610ULL, 0x724B8ECDD64D0DA5ULL, > + 0x30BB6F267FA73B36ULL, 0x4AC29F2A07BFD00DULL, 0x08327EC1AE55E69EULL, > + 0xCF235CFD546BBD2BULL, 0x8DD3BD16FD818BB8ULL, 0x03F1F96F09FD3CD2ULL, > + 0x41011884A0170A41ULL, 0x86103AB85A2951F4ULL, 0xC4E0DB53F3C36767ULL, > + 0xD8A453A01B3A09B3ULL, 0x9A54B24BB2D03F20ULL, 0x5D45907748EE6495ULL, > + 0x1FB5719CE1045206ULL, 0x919735E51578E56CULL, 0xD367D40EBC92D3FFULL, > + 0x1476F63246AC884AULL, 0x568617D9EF46BED9ULL, 0xE085162AB69D5E3CULL, > + 0xA275F7C11F7768AFULL, 0x6564D5FDE549331AULL, 0x279434164CA30589ULL, > + 0xA9B6706FB8DFB2E3ULL, 0xEB46918411358470ULL, 0x2C57B3B8EB0BDFC5ULL, > + 0x6EA7525342E1E956ULL, 0x72E3DAA0AA188782ULL, 0x30133B4B03F2B111ULL, > + 0xF7021977F9CCEAA4ULL, 0xB5F2F89C5026DC37ULL, 0x3BD0BCE5A45A6B5DULL, > + 0x79205D0E0DB05DCEULL, 0xBE317F32F78E067BULL, 0xFCC19ED95E6430E8ULL, > + 0x86B86ED5267CDBD3ULL, 0xC4488F3E8F96ED40ULL, 0x0359AD0275A8B6F5ULL, > + 0x41A94CE9DC428066ULL, 0xCF8B0890283E370CULL, 0x8D7BE97B81D4019FULL, > + 0x4A6ACB477BEA5A2AULL, 0x089A2AACD2006CB9ULL, 0x14DEA25F3AF9026DULL, > + 0x562E43B4931334FEULL, 0x913F6188692D6F4BULL, 0xD3CF8063C0C759D8ULL, > + 0x5DEDC41A34BBEEB2ULL, 0x1F1D25F19D51D821ULL, 0xD80C07CD676F8394ULL, > + 0x9AFCE626CE85B507ULL > +}; > + > +inline uint64_t crc64(const void *_data, size_t len) > +{ > + uint64_t crc = 0xFFFFFFFFFFFFFFFFULL; > + const unsigned char *data = _data; > + > + while (len--) { > + int i = ((int) (crc >> 56) ^ *data++) & 0xFF; > + crc = crc_table[i] ^ (crc << 8); > + } > + > + return crc ^ 0xFFFFFFFFFFFFFFFFULL; > +} > diff --git a/maps.c b/maps.c > index f2ba9a7..cedf548 100644 > --- a/maps.c > +++ b/maps.c > @@ -94,6 +94,8 @@ mapping_t pers[] = { > { "10", 10}, > { "faulty", LEVEL_FAULTY}, > { "container", LEVEL_CONTAINER}, > + { "bcache", LEVEL_BCACHE}, > + { "11", LEVEL_BCACHE}, > { NULL, 0} > }; > > diff --git a/mdadm.h b/mdadm.h > index 3bcd052..a0ccff6 100644 > --- a/mdadm.h > +++ b/mdadm.h > @@ -816,6 +816,7 @@ extern struct superswitch { > extern struct superswitch super0, super1; > extern struct superswitch super_imsm, super_ddf; > extern struct superswitch mbr, gpt; > +extern struct superswitch super_bcache; > > struct metadata_update { > int len; > @@ -1296,6 +1297,7 @@ static inline int xasprintf(char **strp, const char *fmt, ...) { > #define LEVEL_MULTIPATH (-4) > #define LEVEL_LINEAR (-1) > #define LEVEL_FAULTY (-5) > +#define LEVEL_BCACHE (0xb) > > /* kernel module doesn't know about these */ > #define LEVEL_CONTAINER (-100) > diff --git a/super-bcache.c b/super-bcache.c > new file mode 100644 > index 0000000..ec8f3db > --- /dev/null > +++ b/super-bcache.c > @@ -0,0 +1,634 @@ > +/* > + * mdadm - bcache support > + * > + * Copyright (C) 2012 Intel Corporation > + * > + * bcache definitions copied from bcache-tools: > + * git://evilpiepirate.org/~kent/bcache-tools.git > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms and conditions of the GNU General Public License, > + * version 2, as published by the Free Software Foundation. > + * > + * This program is distributed in the hope it will be useful, but WITHOUT > + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or > + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for > + * more details. > + * > + * You should have received a copy of the GNU General Public License along with > + * this program; if not, write to the Free Software Foundation, Inc., > + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. > + */ > +#define HAVE_STDINT_H 1 > +#include "mdadm.h" > +#include "bcache.h" > + > +struct bcache_super { > + union { > + struct cache_sb *sb; > + void *buf; > + }; > + struct dl { > + int major, minor; > + char *devname; > + int fd; > + } *disk; > + int vol; > + struct bcache_super *next; > +}; > + > +enum { > + /* FIXME this is a function of the bucket size */ > + BCACHE_MAX_DEVICES = 2, > +}; > + > +static int load_cache_sb(struct bcache_super *super, int keep_fd) > +{ > + struct dl *d = super->disk; > + int rc, fd = d->fd; > + struct cache_sb *c; > + struct stat s; > + > + if (!keep_fd) > + d->fd = -1; > + > + rc = fstat(fd, &s); > + if (rc) > + return rc; > + d->major = major(s.st_rdev); > + d->minor = minor(s.st_rdev); > + > + rc = posix_memalign(&super->buf, 4096, 4096); > + if (rc) > + return rc; > + c = super->sb; > + > + if (pread(fd, c, 4096, SB_SECTOR << 9) != 4096) > + return errno; > + > + if (csum_set(c) != __le64_to_cpu(c->csum)) > + return ENODEV; > + > + if (memcmp(c->magic, bcache_magic, sizeof(bcache_magic)) != 0) > + return ENODEV; > + > + return 0; > +} > + > +static void __free_bcache(struct bcache_super *super) > +{ > + if (!super) > + return; > + > + while (super) { > + struct bcache_super *next = super->next; > + struct dl *d = super->disk; > + > + d = super->disk; > + if (d->fd >= 0) > + close(d->fd); > + free(d->devname); > + free(d); > + free(super->sb); > + free(super); > + super = next; > + } > +} > + > +static void free_bcache(struct supertype *st) > +{ > + struct bcache_super *super = st->sb; > + > + __free_bcache(super); > + st->sb = NULL; > +} > + > +#ifndef MDASSEMBLE > +static void examine_bcache(struct supertype *st, char *homehost) > +{ > + const char *const cache_policies[] = { "lru", "fifo", "random", "" }; > + const char *const bdev_states[] = { "none", "clean", "dirty", "stale" }; > + const char *const bdev_modes[16] = { "writethrough", "writeback", "writearound", "none" }; > + struct bcache_super *super = st->sb; > + uint16_t first_bucket, bucket_size; > + struct cache_sb *c = super->sb; > + uint64_t nbuckets, csum; > + unsigned long long sz; > + char nbuf[64]; > + > + printf(" Magic : %s\n", > + memcmp(bcache_magic, c->magic, 16) ? "<unknown>" : "<bcache>"); > + printf(" Version : %d\n", (int) c->version); > + printf(" Role : %s\n", SB_BDEV(c) ? "backing-device" : "cache"); > + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':'); > + printf(" Set UUID : %s\n", nbuf + 5); > + __fname_from_uuid((int *) c->uuid, 0, nbuf, ':'); > + printf(" Cache Devs : %u\n", c->nr_in_set); > + /* FIXME: list all cache dev uuids in the load_container case */ > + printf(" Device UUID : %s\n", nbuf + 5); > + printf(" Flags :%s%s\n", CACHE_DISCARD(c) ? " discard" : "", > + CACHE_SYNC(c) ? " sync" : ""); > + if (SB_BDEV(c)) { > + printf(" State : %s\n", bdev_states[BDEV_STATE(c)]); > + printf(" Mode : %s\n", bdev_modes[BDEV_CACHE_MODE(c)]); > + } else { > + printf(" Policy : %s\n", cache_policies[CACHE_REPLACEMENT(c)]); > + /* FIXME: add reporting of backing device uuids in the cache caase */ > + } > + printf(" Label : %.32s\n", c->label); > + csum = __le64_to_cpu(c->csum); > + nbuckets = __le64_to_cpu(c->nbuckets); > + bucket_size = __le16_to_cpu(c->bucket_size); > + first_bucket = __le16_to_cpu(c->first_bucket); > + sz = (nbuckets - first_bucket) * bucket_size; > + printf(" Device Size : %llu%s\n", sz, human_size(sz * 512)); > + printf(" Bucket Size : %u\n", bucket_size); > + printf(" Num Buckets : %llu\n", (unsigned long long) nbuckets); > + printf(" this dev : %u\n", __le16_to_cpu(c->nr_this_dev)); > + printf("First Bucket : %u\n", first_bucket); > + printf(" Checksum : %llx %s\n", (unsigned long long) csum, > + csum == csum_set(c) ? "correct" : "incorrect"); > +} > + > +static void brief_examine_bcache(struct supertype *st, int verbose) > +{ > + struct bcache_super *super = st->sb; > + struct cache_sb *c = super->sb; > + char nbuf[64]; > + > + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':'); > + printf("ARRAY metadata=bcache UUID=%s\n", nbuf + 5); > +} > + > +static void brief_examine_subarrays_bcache(struct supertype *st, int verbose) > +{ > + struct bcache_super *super = st->sb; > + struct cache_sb *c = super->sb; > + char nbuf[64], nbuf1[64]; > + > + /* FIXME this needs to parse the cache device journal to find > + * and report the backing dev uuid list > + */ > + if (!SB_BDEV(c)) > + return; > + > + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':'); > + __fname_from_uuid((int *) c->uuid, 0, nbuf1, ':'); > + > + printf("ARRAY container=%s UUID=%s\n", nbuf + 5, nbuf1 + 5); > +} > + > +static void export_examine_bcache(struct supertype *st) > +{ > + struct bcache_super *super = st->sb; > + struct cache_sb *c = super->sb; > + char nbuf[64]; > + > + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':'); > + printf("MD_METADATA=bcache\n"); > + printf("MD_LEVEL=container\n"); > + printf("MD_UUID=%s\n", nbuf+5); > + printf("MD_DEVICES=%d\n", __le16_to_cpu(c->nr_in_set) + 1); > +} > + > +static void detail_bcache(struct supertype *st, char *homehost) > +{ > + struct bcache_super *super = st->sb; > + struct cache_sb *c = super->sb; > + char nbuf[64]; > + > + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':'); > + printf("\n UUID : %s\n", nbuf + 5); > +} > + > +static void brief_detail_bcache(struct supertype *st) > +{ > + struct bcache_super *super = st->sb; > + struct cache_sb *c = super->sb; > + char nbuf[64]; > + > + __fname_from_uuid((int *) c->set_uuid, 0, nbuf, ':'); > + printf(" UUID=%s", nbuf + 5); > +} > + > +static struct bcache_super *alloc_super(const char *func) > +{ > + struct bcache_super *super = calloc(1, sizeof(*super)); > + struct dl *d = calloc(1, sizeof(*d)); > + > + if (!super || !d) { > + fprintf(stderr, Name "%s: %s failed\n", func, __func__); > + free(super); > + free(d); > + return NULL; > + } > + > + super->vol = -1; > + super->disk = d; > + > + return super; > +} > + > +static int load_container_bcache(struct supertype *st, int fd, char *devname) > +{ > + struct bcache_super *list = NULL; > + int rc, i, cdev = 0, bdev = 0; > + int devnum = fd2devnum(fd); > + struct mdinfo *sra, *sd; > + > + sra = sysfs_read(fd, 0, GET_LEVEL|GET_VERSION|GET_DEVS|GET_STATE); > + if (!sra) > + return 1; > + > + if (sra->array.major_version != -1 || > + sra->array.minor_version != -2 || > + strcmp(sra->text_version, "bcache") != 0) { > + rc = 1; > + goto error; > + } > + > + for (sd = sra->devs, i = 0; sd; sd = sd->next, i++) { > + struct bcache_super *super = alloc_super(__func__); > + struct cache_sb *c; > + char nm[32]; > + int fd; > + > + rc = 1; > + if (!super) > + goto error; > + super->next = list; > + list = super; > + > + rc = 2; > + sprintf(nm, "%d:%d", sd->disk.major, sd->disk.minor); > + fd = dev_open(nm, O_RDWR); > + if (fd < 0) > + goto error; > + > + super->disk->fd = fd; > + rc = load_cache_sb(super, 1); > + if (rc) > + goto error; > + c = super->sb; > + if (SB_BDEV(c)) > + bdev++; > + else > + cdev++; > + } > + rc = 0; > + > + /* FIXME disambiguate multiple bdevs per set, support multiple > + * cache devices > + */ > + if (bdev > 1) { > + fprintf(stderr, Name ": %d backing devices detected\n", bdev); > + rc = 3; > + } > + if (cdev > 1) { > + fprintf(stderr, Name ": %d cache devices detected\n", cdev); > + rc = 3; > + } > + if (rc) > + goto error; > + st->sb = list; > + list = NULL; > + > +error: > + if (list) > + __free_bcache(list); > + sysfs_free(sra); > + > + st->container_dev = devnum; > + if (rc == 0 && st->ss == NULL) { > + st->ss = &super_bcache; > + st->minor_version = 0; > + st->max_devs = BCACHE_MAX_DEVICES; > + } > + return rc; > +} > +#endif > + > +static int load_bcache(struct supertype *st, int fd, char *devname) > +{ > + struct bcache_super *super; > + struct dl *d; > + int rc; > + > + free_bcache(st); > + > + super = alloc_super(__func__); > + if (!super) > + return 1; > + > + st->sb = super; > + d = super->disk; > + d->devname = devname ? strdup(devname) : NULL; > + d->fd = fd; > + rc = load_cache_sb(super, 0); > + if (rc) { > + free_bcache(st); > + if (!devname) > + return rc; > + fprintf(stderr, Name ": %s failed on %s (%s)\n", __func__, > + devname, strerror(rc)); > + return rc; > + } > + > + if (st->ss == NULL) { > + st->ss = &super_bcache; > + st->minor_version = 0; > + st->max_devs = BCACHE_MAX_DEVICES; > + } > + > + return 0; > +} > + > +static int store_bcache(struct supertype *st, int fd) > +{ > + struct bcache_super *super = st->sb; > + struct cache_sb *c = super->sb; > + > + if (!c) > + return 1; > + > + if (pwrite(fd, c, sizeof(*c), SB_SECTOR << 9) != sizeof(*c)) > + return 1; > + > + return 0; > +} > + > +static int compare_bcache(struct supertype *st, struct supertype *tst) > +{ > + struct bcache_super *a = st->sb; > + struct bcache_super *b = tst->sb; > + > + if (!st->sb) { > + st->sb = tst->sb; > + tst->sb = NULL; > + return 0; > + } > + > + if (memcmp(a->sb->set_uuid, b->sb->set_uuid, sizeof(b->sb->set_uuid)) != 0) > + return 2; > + > + return 0; > +} > + > +static __u64 avail_size_bcache(struct supertype *st, __u64 devsize) > +{ > + /* 4k from start, 8k min data offset */ > + const uint32_t reserved_sectors = (4+8) * 2; > + > + if (devsize < reserved_sectors) > + return 0; > + > + return devsize - reserved_sectors; > +} > + > +static struct supertype *match_metadata_desc_bcache(char *arg) > +{ > + struct supertype *st; > + > + if (strcmp(arg, "bcache") != 0 && > + strcmp(arg, "default") != 0) > + return NULL; > + > + st = calloc(1, sizeof(*st)); > + if (!st) > + return NULL; > + st->container_dev = NoMdDev; > + st->ss = &super_bcache; > + st->max_devs = BCACHE_MAX_DEVICES; > + st->minor_version = 0; > + st->sb = NULL; > + > + return st; > +} > + > +static int match_home_bcache(struct supertype *st, char *homehost) > +{ > + /* the bcache superblock does not specify any host > + * identification information. maybe it should... > + */ > + > + return -1; > +} > + > +static void uuid_from_bcache(struct supertype *st, int uuid[4]) > +{ > + struct bcache_super *super = st->sb; > + struct cache_sb *c = super->sb; > + > + memcpy(uuid, c->set_uuid, sizeof(c->set_uuid)); > +} > + > +static void getinfo_bcache_volume(struct supertype *st, struct mdinfo *info, int map_disks, char *dmap) > +{ > + char *name = devnum2devname(st->container_dev); > + struct bcache_super *super = st->sb; > + uint16_t bucket_size, first_bucket; > + struct cache_sb *c = super->sb; > + unsigned long long sz; > + uint64_t nbuckets; > + > + nbuckets = __le64_to_cpu(c->nbuckets); > + bucket_size = __le16_to_cpu(c->bucket_size); > + first_bucket = __le16_to_cpu(c->first_bucket); > + sz = (nbuckets - first_bucket) * bucket_size; > + > + info->container_member = super->vol; > + info->custom_array_size = sz; > + info->component_size = sz; > + info->recovery_start = MaxSector; > + info->data_offset = SB_SECTOR + SB_SIZE; > + sprintf(info->text_version, "/%s/%d", name, super->vol); > + snprintf(info->name, sizeof(info->name), "%s", c->label); > + memcpy(info->uuid, c->uuid, sizeof(c->uuid)); > + > + info->array.raid_disks = __le16_to_cpu(c->nr_in_set) + 1; > + info->array.level = LEVEL_BCACHE; > + info->array.layout = 0; > + info->array.md_minor = -1; > + info->array.ctime = 0; > + info->array.utime = 0; > + info->array.chunk_size = bucket_size * 512; > + info->array.major_version = -1; > + info->array.minor_version = -2; > + > + info->disk.major = 0; > + info->disk.minor = 0; > + info->disk.raid_disk = SB_BDEV(c); > + info->disk.number = SB_BDEV(c); > + info->disk.state = 1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC; > +} > + > +static void getinfo_bcache(struct supertype *st, struct mdinfo *info, char *dmap) > +{ > + int i, cset, bdev, map_disks = info->array.raid_disks; > + struct bcache_super *super = st->sb; > + struct cache_sb *c = super->sb; > + > + memset(info, 0, sizeof(*info)); > + > + if (super->vol >= 0) > + return getinfo_bcache_volume(st, info, map_disks, dmap); > + > + /* make Assemble choose the cache target */ > + info->events = SB_BDEV(c); > + info->recovery_start = MaxSector; > + info->data_offset = SB_SECTOR; > + info->component_size = SB_SIZE; > + strcpy(info->text_version, "bcache"); > + memcpy(info->uuid, c->set_uuid, sizeof(c->set_uuid)); > + > + info->array.raid_disks = __le16_to_cpu(c->nr_in_set) + 1; > + info->array.level = LEVEL_CONTAINER; > + info->array.layout = 0; > + info->array.md_minor = -1; > + info->array.ctime = 0; > + info->array.utime = 0; > + info->array.chunk_size = __le16_to_cpu(c->bucket_size) * 512; > + info->array.major_version = -1; > + info->array.minor_version = -2; > + > + info->disk.major = 0; > + info->disk.minor = 0; > + info->disk.raid_disk = SB_BDEV(c); > + info->disk.number = SB_BDEV(c); > + /* FIXME: need bcache superblock to identify failed devices */ > + info->disk.state = 1 << MD_DISK_ACTIVE | 1 << MD_DISK_SYNC; > + > + /* FIXME need to parse the journal uuid_bucket to understand > + * which cache devs are consistent with the set > + */ > + for (i = 0; dmap && i < map_disks; i++) > + dmap[i] = 1; > + > + cset = 0; > + bdev = 0; > + while (super) { > + c = super->sb; > + > + /* FIXME filter out-of-sync devices */ > + if (SB_BDEV(c)) > + bdev++; > + else > + cset++; > + super = super->next; > + } > + > + if (cset + bdev == __le16_to_cpu(c->nr_in_set) + 1) > + info->container_enough = 1; > + else > + info->container_enough = -1; > +} > + > +static int update_bcache(struct supertype *st, struct mdinfo *i, char *update, > + char *devname, int verbose, int uuid_set, char *homehost) > +{ > + /* FIXME */ > + if (strcmp(update, "grow") == 0) { > + return 0; > + } else if (strcmp(update, "resync") == 0) { > + return 0; > + } else if (strcmp(update, "homehost") == 0) { > + return -1; > + } else if (strcmp(update, "name") == 0) { > + return -1; > + } else if (strcmp(update, "_reshape_progress") == 0) { > + return 0; > + } else if (strcmp(update, "assemble") == 0 ) { > + return 0; > + } else { > + return -1; > + } > +} > + > +static struct mdinfo *container_content_bcache(struct supertype *st, char *subarray) > +{ > + struct bcache_super *super = st->sb; > + struct mdinfo *info, *disk = NULL; > + char *ep; > + > + info = calloc(1, sizeof(*info)); > + if (!info) { > + fprintf(stderr, Name ": failed to allocate %zu bytes\n", > + sizeof(*info)); > + return NULL; > + } > + > + /* don't support multiple backing disks per cache set */ > + if (subarray && (strtoul(subarray, &ep, 10) > 0 || *ep != '\0')) > + goto error; > + > + super->vol = 0; > + getinfo_bcache(st, info, NULL); > + > + for (; super; super = super->next) { > + struct dl *d = super->disk; > + struct cache_sb *c = super->sb; > + > + disk = calloc(1, sizeof(*disk)); > + if (!disk) { > + fprintf(stderr, Name ": failed to allocate disk\n"); > + goto error; > + } > + disk->next = info->devs; > + info->devs = disk; > + > + disk->disk.number = SB_BDEV(c); > + disk->disk.raid_disk = SB_BDEV(c); > + disk->disk.major = d->major; > + disk->disk.minor = d->minor; > + disk->recovery_start = MaxSector; > + disk->disk.state = 1 << MD_DISK_ACTIVE; > + disk->data_offset = info->data_offset; > + disk->component_size = info->component_size; > + > + info->array.working_disks++; > + } > + > + return info; > + > + error: > + disk = info->devs; > + while (disk) { > + struct mdinfo *next = disk->next; > + > + free(disk); > + disk = next; > + } > + > + free(info); > + return NULL; > +} > + > + > +struct superswitch super_bcache = { > +#ifndef MDASSEMBLE > + .examine_super = examine_bcache, > + .brief_examine_super = brief_examine_bcache, > + .brief_examine_subarrays = brief_examine_subarrays_bcache, > + .export_examine_super = export_examine_bcache, > + .detail_super = detail_bcache, > + .brief_detail_super = brief_detail_bcache, > + .load_container = load_container_bcache, > +#endif > + .match_home = match_home_bcache, > + .uuid_from_super = uuid_from_bcache, > + .getinfo_super = getinfo_bcache, > + .update_super = update_bcache, > + > + .avail_size = avail_size_bcache, > + > + .compare_super = compare_bcache, > + > + .load_super = load_bcache, > + .store_super = store_bcache, > + .free_super = free_bcache, > + .match_metadata_desc = match_metadata_desc_bcache, > + .container_content = container_content_bcache, > + > + .external = 1, > + .name = "bcache", > +}; > diff --git a/util.c b/util.c > index 6985a70..d9e49cf 100644 > --- a/util.c > +++ b/util.c > @@ -919,7 +919,7 @@ struct superswitch *superlist[] = > { > &super0, &super1, > &super_ddf, &super_imsm, > - &mbr, &gpt, > + &mbr, &gpt, &super_bcache, > NULL }; > > #if !defined(MDASSEMBLE) || defined(MDASSEMBLE) && defined(MDASSEMBLE_AUTO) > > -- > To unsubscribe from this list: send the line "unsubscribe linux-bcache" in > the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org > More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [mdadm PATCH] bcache: add bcache superblock [not found] ` <20120511203835.26301.1937.stgit-p8uTFz9XbKgaePuBGzJMJzMJUdESFZ8XQQ4Iyu8u01E@public.gmane.org> 2012-05-12 7:38 ` Jack Wang @ 2012-05-15 0:04 ` Mark Hills [not found] ` <alpine.LNX.2.01.1205150042200.12473-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org> 1 sibling, 1 reply; 6+ messages in thread From: Mark Hills @ 2012-05-15 0:04 UTC (permalink / raw) To: Dan Williams Cc: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA, linux-raid-u79uwXL29TY76Z2rM5mHXA, linux-bcache-u79uwXL29TY76Z2rM5mHXA On Fri, 11 May 2012, Dan Williams wrote: > This is a hybrid proposal for supporting bcache as a md device. > Somewhat similar to the v1.x metadata format, where array assembly is > handled in userspace, but managed in the kernel. In the bcache case it > is an "external" metadata format, but then the expectation is that the > kernel "bcache" personality takes over runtime maintenance of the > metadata. I am having some trouble with this, can you clarify (perhaps by example) how to create a pairing of a cache and backing device? I tried creating directly: # mdadm --create /dev/md0 --level=11 --raid-devices=2 /dev/sdb /dev/sdc mdadm: unknown level 11 But as the code in Assemble.c is patched (and not Create.c), I had a hunch to format the devices as before then assemble the array: # make-bcache -C /dev/sdb # make-bcache -B /dev/sdc # mdadm -A /dev/md0 /dev/sdb /dev/sdc mdadm: Cannot assemble mbr metadata on /dev/sdb mdadm: /dev/sdb has no superblock - assembly aborted For a hack, I changed Create.c to accept case 11 and leave the chosen chunk size. It got further, but then failed with: # mdadm --create /dev/md/bcache --level=11 --raid-devices=2 /dev/sdb /dev/sdc mdadm: /dev/sdb appears to be part of a raid array: level=raid0 devices=0 ctime=Thu Jan 1 01:00:00 1970 mdadm: partition table exists on /dev/sdb but will be lost or meaningless after creating array mdadm: /dev/sdc appears to be part of a raid array: level=raid0 devices=0 ctime=Thu Jan 1 01:00:00 1970 mdadm: partition table exists on /dev/sdc but will be lost or meaningless after creating array mdadm: largest drive (/dev/sdc) exceeds size (117155216K) by more than 1% Continue creating array? y mdadm: Defaulting to version 1.2 metadata mdadm: RUN_ARRAY failed: Cannot allocate memory Maybe I missed something basic here, but I'm afraid can't see what? If not, hopefully this information is useful. I use bcache v13 patches and the MD conversion, and this patch to mdadm. $ lsmod | grep bcache md_bcache 3202 0 bcache 138187 1 md_bcache md_mod 88671 5 md_bcache,raid456,raid1 $ uname -a Linux stax 3.4.0-rc7-mh+ #190 SMP PREEMPT Sun May 13 22:36:17 BST 2012 i686 Intel(R) Pentium(R) D CPU 2.80GHz GenuineIntel GNU/Linux Thanks -- Mark ^ permalink raw reply [flat|nested] 6+ messages in thread
[parent not found: <alpine.LNX.2.01.1205150042200.12473-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org>]
* Re: [mdadm PATCH] bcache: add bcache superblock [not found] ` <alpine.LNX.2.01.1205150042200.12473-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org> @ 2012-05-15 16:59 ` Dan Williams [not found] ` <CABE8wwu_TXJckRXg1eSny8TnciJ6GOM_Vy7FNW1FX84YT6QZzQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 0 siblings, 1 reply; 6+ messages in thread From: Dan Williams @ 2012-05-15 16:59 UTC (permalink / raw) To: Mark Hills Cc: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA, linux-raid-u79uwXL29TY76Z2rM5mHXA, linux-bcache-u79uwXL29TY76Z2rM5mHXA On Mon, May 14, 2012 at 5:04 PM, Mark Hills <mark-UrrBsZIrrsb10XsdtD+oqA@public.gmane.org> wrote: > On Fri, 11 May 2012, Dan Williams wrote: > >> This is a hybrid proposal for supporting bcache as a md device. >> Somewhat similar to the v1.x metadata format, where array assembly is >> handled in userspace, but managed in the kernel. In the bcache case it >> is an "external" metadata format, but then the expectation is that the >> kernel "bcache" personality takes over runtime maintenance of the >> metadata. > > I am having some trouble with this, can you clarify (perhaps by example) > how to create a pairing of a cache and backing device? > > I tried creating directly: > > # mdadm --create /dev/md0 --level=11 --raid-devices=2 /dev/sdb /dev/sdc > mdadm: unknown level 11 > > But as the code in Assemble.c is patched (and not Create.c), I had a hunch > to format the devices as before then assemble the array: Yeah, "create" support is not there yet. > # make-bcache -C /dev/sdb > # make-bcache -B /dev/sdc > # mdadm -A /dev/md0 /dev/sdb /dev/sdc > mdadm: Cannot assemble mbr metadata on /dev/sdb I should have been more explicit in the changelog. This current patch was only tested to assemble an existing bcache configuration. I.e. it assumes the backing device has been attached to the cache set at least once. By default make-bcache always creates a new cache-set id per invocation. So in the above example it won't find sdb and sdc belong to the same md device because the cache-set id's differ. You can verify this with mdadm -E. [..] > > Maybe I missed something basic here, but I'm afraid can't see what? If > not, hopefully this information is useful. > > I use bcache v13 patches and the MD conversion, and this patch to mdadm. For reference: [root@fedora-virt ~]# mdadm -E /dev/vd[bc] /dev/vdb: Magic : <bcache> Version : 3 Role : cache Set UUID : e8670bc3:974b489c:aeb68ea1:d1adf6d1 Cache Devs : 1 Device UUID : a1b25e26:5a4c4db6:2716c1aa:eba44ec7 Flags : sync Policy : lru Label : Device Size : 16776192 (8.00 GiB 8.59 GB) Bucket Size : 1024 Num Buckets : 16384 this dev : 0 First Bucket : 1 Checksum : 45fa97cda53b0b6 correct /dev/vdc: Magic : <bcache> Version : 1 Role : backing-device Set UUID : e8670bc3:974b489c:aeb68ea1:d1adf6d1 Cache Devs : 1 Device UUID : 7045ff87:aa4b13c4:670d8bb2:0b4ad628 Flags : sync State : clean Mode : writeback Label : Device Size : 16776192 (8.00 GiB 8.59 GB) Bucket Size : 1024 Num Buckets : 16384 this dev : 0 First Bucket : 1 Checksum : f36910e51451d91c correct [root@fedora-virt ~]# mdadm -Avvv /dev/md/bcache /dev/vd[bc] mdadm: looking for devices for /dev/md/bcache mdadm: /dev/vdb is identified as a member of /dev/md/bcache, slot 0. mdadm: /dev/vdc is identified as a member of /dev/md/bcache, slot 1. mdadm: added /dev/vdb to /dev/md/bcache as 0 mdadm: added /dev/vdc to /dev/md/bcache as 1 mdadm: Container /dev/md/bcache has been assembled with 2 drives [root@fedora-virt ~]# cat /proc/mdstat Personalities : [bcache] md126 : active bcache vdb[1] vdc[0] 8388096 blocks super external:/md127/0 8388096k cache-blocks md127 : inactive vdc[1](S) vdb[0](S) 16 blocks super external:bcache unused devices: <none> ^ permalink raw reply [flat|nested] 6+ messages in thread
[parent not found: <CABE8wwu_TXJckRXg1eSny8TnciJ6GOM_Vy7FNW1FX84YT6QZzQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>]
* Re: [mdadm PATCH] bcache: add bcache superblock [not found] ` <CABE8wwu_TXJckRXg1eSny8TnciJ6GOM_Vy7FNW1FX84YT6QZzQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> @ 2012-05-15 21:16 ` Mark Hills [not found] ` <alpine.LNX.2.01.1205152200500.13405-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org> 0 siblings, 1 reply; 6+ messages in thread From: Mark Hills @ 2012-05-15 21:16 UTC (permalink / raw) To: Dan Williams Cc: neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA, linux-raid-u79uwXL29TY76Z2rM5mHXA, linux-bcache-u79uwXL29TY76Z2rM5mHXA [-- Attachment #1: Type: TEXT/PLAIN, Size: 3396 bytes --] On Tue, 15 May 2012, Dan Williams wrote: > On Mon, May 14, 2012 at 5:04 PM, Mark Hills <mark-UrrBsZIrrsb10XsdtD+oqA@public.gmane.org> wrote: > > On Fri, 11 May 2012, Dan Williams wrote: > > > >> This is a hybrid proposal for supporting bcache as a md device. > >> Somewhat similar to the v1.x metadata format, where array assembly is > >> handled in userspace, but managed in the kernel. In the bcache case it > >> is an "external" metadata format, but then the expectation is that the > >> kernel "bcache" personality takes over runtime maintenance of the > >> metadata. > > > > I am having some trouble with this, can you clarify (perhaps by example) > > how to create a pairing of a cache and backing device? [...] > > # make-bcache -C /dev/sdb > > # make-bcache -B /dev/sdc > > # mdadm -A /dev/md0 /dev/sdb /dev/sdc > > mdadm: Cannot assemble mbr metadata on /dev/sdb > > I should have been more explicit in the changelog. This current patch > was only tested to assemble an existing bcache configuration. I.e. it > assumes the backing device has been attached to the cache set at least > once. By default make-bcache always creates a new cache-set id per > invocation. So in the above example it won't find sdb and sdc belong > to the same md device because the cache-set id's differ. You can > verify this with mdadm -E. Thanks for explaining. I suppose this is just expecially awkward at the moment because attaching the devices can't be done without a different kernel. So it will be good to see the "create" support, when it is ready. > [..] > > > > Maybe I missed something basic here, but I'm afraid can't see what? If > > not, hopefully this information is useful. > > > > I use bcache v13 patches and the MD conversion, and this patch to mdadm. > > For reference: > > [root@fedora-virt ~]# mdadm -E /dev/vd[bc] > /dev/vdb: > Magic : <bcache> > Version : 3 > Role : cache > Set UUID : e8670bc3:974b489c:aeb68ea1:d1adf6d1 > Cache Devs : 1 > Device UUID : a1b25e26:5a4c4db6:2716c1aa:eba44ec7 > Flags : sync > Policy : lru > Label : > Device Size : 16776192 (8.00 GiB 8.59 GB) > Bucket Size : 1024 > Num Buckets : 16384 > this dev : 0 > First Bucket : 1 > Checksum : 45fa97cda53b0b6 correct > /dev/vdc: > Magic : <bcache> > Version : 1 > Role : backing-device > Set UUID : e8670bc3:974b489c:aeb68ea1:d1adf6d1 > Cache Devs : 1 > Device UUID : 7045ff87:aa4b13c4:670d8bb2:0b4ad628 > Flags : sync > State : clean > Mode : writeback > Label : > Device Size : 16776192 (8.00 GiB 8.59 GB) > Bucket Size : 1024 > Num Buckets : 16384 > this dev : 0 > First Bucket : 1 > Checksum : f36910e51451d91c correct Presumably there's a little more too this, as I can't even investigate the UIDs in use: # make-bcache -C /dev/sdb [...] # make-bcache -B /dev/sdc [...] # mdadm -E /dev/sd[bc] /dev/sdb: MBR Magic : aa55 /dev/sdc: MBR Magic : aa55 I confirmed that I'm using the patched mdadm, but it still doesn't seem to recognise the magic. But, of course, I understand this is all work in progress. So this isn't a complaint at all, I am just looking forward to code in this area and happy to test where I can. Thanks -- Mark ^ permalink raw reply [flat|nested] 6+ messages in thread
[parent not found: <alpine.LNX.2.01.1205152200500.13405-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org>]
* Re: [mdadm PATCH] bcache: add bcache superblock [not found] ` <alpine.LNX.2.01.1205152200500.13405-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org> @ 2012-07-12 15:03 ` Jacek Danecki 0 siblings, 0 replies; 6+ messages in thread From: Jacek Danecki @ 2012-07-12 15:03 UTC (permalink / raw) To: Mark Hills Cc: Dan Williams, neilb-l3A5Bk7waGM, koverstreet-hpIqsD4AKlfQT0dZR+AlfA, linux-raid-u79uwXL29TY76Z2rM5mHXA, linux-bcache-u79uwXL29TY76Z2rM5mHXA On 05/15/12 23:16, Mark Hills wrote: > Presumably there's a little more too this, as I can't even investigate the > UIDs in use: > > # make-bcache -C /dev/sdb > [...] > # make-bcache -B /dev/sdc > [...] > # mdadm -E /dev/sd[bc] > /dev/sdb: > MBR Magic : aa55 > /dev/sdc: > MBR Magic : aa55 > > I confirmed that I'm using the patched mdadm, but it still doesn't seem to > recognise the magic. As I can see the problem could be in mdadm which can recognize mbr superblock if partition table exists on disk, and then uses load_super_mbr() instead of load_bcache(). You can look at guess_super_type() function in util.c file. See log below. I've attached small patch, which I used to find out where issue is. Btw, I've not idea why getinfo_super() for imsm and bcache set info.array.ctime always to 0. Probably it should be fixed also because this value is used by guess_super_type() function. rv = ss->load_super(st, fd, NULL); if (rv == 0) { struct mdinfo info; st->ss->getinfo_super(st, &info, NULL); if (bestsuper == -1 || besttime < info.array.ctime) { bestsuper = i; besttime = info.array.ctime; } ss->free_super(st); } ./mdadm -E /dev/sdb check 0.90 0x685a60 load super check 1.x 0x685be0 load super check ddf 0x686060 load super check imsm 0x686240 load super check mbr 0x686400 load super getinfo_super for mbr bestsuper=-1 set bestsuper to 4 check gpt 0x686580 load super check bcache 0x686700 load super load_cache_sb getinfo_super for bcache bestsuper=4 load super again: mbr 0x45a3c1 load_super: 0x686400 mbr 0x45a3c1 /dev/sdb: 0x686400 mbr 0x45a3c1 MBR Magic : aa55 To workaround this issue I've destroyed partition table on /dev/sdb, and now mdadm can examine bcache superblock correctly. ./mdadm -E /dev/sdb check 0.90 0x685a60 load super check 1.x 0x685be0 load super check ddf 0x686060 load super check imsm 0x686240 load super check mbr 0x686400 load super check gpt 0x686580 load super check bcache 0x686700 load super load_cache_sb getinfo_super for bcache bestsuper=-1 set bestsuper to 6 load super again: bcache 0x45bb6b load_cache_sb load_super: 0x686700 bcache 0x45bb6b load_cache_sb /dev/sdb: 0x686700 bcache 0x45bb6b Magic : <bcache> Version : 0 Role : cache Set UUID : 1eefbc0e:e4405d53:e6c630a7:83b523c0 Cache Devs : 1 Device UUID : 4a9e3223:cb403537:b05209b0:26948206 Flags : Policy : lru Label : Device Size : 234439680 (111.79 GiB 120.03 GB) Bucket Size : 1024 Num Buckets : 228946 this dev : 0 First Bucket : 1 Checksum : 6730f38d517c4db2 correct diff --git a/Examine.c b/Examine.c index 5d71e53..5173f4a 100644 --- a/Examine.c +++ b/Examine.c @@ -88,10 +88,12 @@ int Examine(struct mddev_dev *devlist, int brief, int export, int scan, if (st) { err = 1; st->ignore_hw_compat = 1; - if (!container) + if (!container) { + printf("load_super: %p %s %p\n", st->ss, st->ss->name, st->ss->load_super); err = st->ss->load_super(st, fd, (brief||scan) ? NULL :devlist->devname); + } if (err && st->ss->load_container) { err = st->ss->load_container(st, fd, (brief||scan) ? NULL @@ -149,7 +151,7 @@ int Examine(struct mddev_dev *devlist, int brief, int export, int scan, st->ss->export_examine_super(st); st->ss->free_super(st); } else { - printf("%s:\n",devlist->devname); + printf("%s: %p %s %p\n",devlist->devname, st->ss, st->ss->name, st->ss->load_super); st->ss->examine_super(st, homehost); st->ss->free_super(st); } diff --git a/super-bcache.c b/super-bcache.c index ec8f3db..ebee248 100644 --- a/super-bcache.c +++ b/super-bcache.c @@ -72,6 +72,7 @@ static int load_cache_sb(struct bcache_super *super, int keep_fd) if (memcmp(c->magic, bcache_magic, sizeof(bcache_magic)) != 0) return ENODEV; + printf("load_cache_sb\n"); return 0; } diff --git a/util.c b/util.c index d9e49cf..5aaa986 100644 --- a/util.c +++ b/util.c @@ -1043,19 +1043,23 @@ struct supertype *guess_super_type(int fd, enum guess_types guess_type) for (i=0 ; superlist[i]; i++) { int rv; ss = superlist[i]; + printf("check %s %p\n", ss->name, ss); if (guess_type == guess_array && ss->add_to_super == NULL) continue; if (guess_type == guess_partitions && ss->add_to_super != NULL) continue; memset(st, 0, sizeof(*st)); st->ignore_hw_compat = 1; + printf("load super\n"); rv = ss->load_super(st, fd, NULL); if (rv == 0) { struct mdinfo info; + printf("getinfo_super for %s bestsuper=%d\n", st->ss->name, bestsuper); st->ss->getinfo_super(st, &info, NULL); if (bestsuper == -1 || besttime < info.array.ctime) { bestsuper = i; + printf("set bestsuper to %d\n", i); besttime = info.array.ctime; } ss->free_super(st); @@ -1065,6 +1069,7 @@ struct supertype *guess_super_type(int fd, enum guess_types guess_type) int rv; memset(st, 0, sizeof(*st)); st->ignore_hw_compat = 1; + printf("load super again: %s %p\n", superlist[bestsuper]->name, superlist[bestsuper]->load_super); rv = superlist[bestsuper]->load_super(st, fd, NULL); if (rv == 0) { superlist[bestsuper]->free_super(st); -- Jacek ^ permalink raw reply related [flat|nested] 6+ messages in thread
end of thread, other threads:[~2012-07-12 15:03 UTC | newest] Thread overview: 6+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2012-05-11 20:39 [mdadm PATCH] bcache: add bcache superblock Dan Williams [not found] ` <20120511203835.26301.1937.stgit-p8uTFz9XbKgaePuBGzJMJzMJUdESFZ8XQQ4Iyu8u01E@public.gmane.org> 2012-05-12 7:38 ` Jack Wang 2012-05-15 0:04 ` Mark Hills [not found] ` <alpine.LNX.2.01.1205150042200.12473-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org> 2012-05-15 16:59 ` Dan Williams [not found] ` <CABE8wwu_TXJckRXg1eSny8TnciJ6GOM_Vy7FNW1FX84YT6QZzQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org> 2012-05-15 21:16 ` Mark Hills [not found] ` <alpine.LNX.2.01.1205152200500.13405-4jfXtw+jRJ582hYKe6nXyg@public.gmane.org> 2012-07-12 15:03 ` Jacek Danecki
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).