本文内容基于linux4.4.198.
最近用了下jffs2文件系统，把心得记录一下。
本文将从实用性的角度介绍jffs2文件系统，关于理论及介绍性的文章，网络上已经有很多了，可以结合着看，从不同角度一起梳理一下jffs2文件系统的相关内容。

直接上jffs2中重要的的数据结构和宏吧。

存放在flash上的数据结构

jffs2在flash上的数据均称为节点，每个节点都以struct jffs2_unknown_node开头，通过这个开头判别这个是个什么类型的节点。

//flash上的数据结构
struct jffs2_unknown_node //其内核描述符为jffs2_raw_node_ref
{
	/* All start like this */
	jint16_t magic;
	jint16_t nodetype;
	jint32_t totlen; /* So we can skip over nodes we don't grok */
	jint32_t hdr_crc; //前面三个成员的crc值
};

在flash上，jffs2主要有两种节点，分别是struct jffs2_raw_dirent和struct jffs2_raw_inode

//flash上的数据结构
struct jffs2_raw_dirent
{
	jint16_t magic;
	jint16_t nodetype;	/* == JFFS2_NODETYPE_DIRENT */
	jint32_t totlen;
	jint32_t hdr_crc;
	jint32_t pino;
	jint32_t version;
	jint32_t ino; /* == zero for unlink */
	jint32_t mctime;
	__u8 nsize; //文件名长度
	__u8 type;
	__u8 unused[2];
	jint32_t node_crc;
	jint32_t name_crc;
	__u8 name[0]; //这里开始是节点的名字
};

/* The JFFS2 raw inode structure: Used for storage on physical media.  */
/* The uid, gid, atime, mtime and ctime members could be longer, but
   are left like this for space efficiency. If and when people decide
   they really need them extended, it's simple enough to add support for
   a new type of raw node.
*/
//flash上的数据结构
struct jffs2_raw_inode
{
	jint16_t magic;      /* A constant magic number.  */
	jint16_t nodetype;   /* == JFFS2_NODETYPE_INODE */
	jint32_t totlen;     /* Total length of this node (inc data, etc.) */
	jint32_t hdr_crc;
	jint32_t ino;        /* Inode number.  */
	jint32_t version;    /* Version number.  */
	jmode_t mode;       /* The file's type or mode.  */ //用来存放744这种权限位
	jint16_t uid;        /* The file's owner.  */
	jint16_t gid;        /* The file's group.  */
	jint32_t isize;      /* Total resultant size of this inode (used for truncations)  */
	jint32_t atime;      /* Last access time.  */
	jint32_t mtime;      /* Last modification time.  */
	jint32_t ctime;      /* Change time.  */
	jint32_t offset;     /* Where to begin to write.  */ //文件内的偏移，该jffs2_raw_inode在
	//flash分区中的偏移则由其内核描述符jffs2_raw_node_ref的flash_offset域表示
	jint32_t csize;      /* (Compressed) data size */
	jint32_t dsize;	     /* Size of the node's data. (after decompression) */
	__u8 compr;       /* Compression algorithm used */ //压缩算法
	__u8 usercompr;   /* Compression algorithm requested by the user */
	jint16_t flags;	     /* See JFFS2_INO_FLAG_* */
	jint32_t data_crc;   /* CRC for the (compressed) data.  */
	jint32_t node_crc;   /* CRC for the raw inode (excluding data)  */
	__u8 data[0];
};

存放在内存中的数据结构

其中，struct jffs2_raw_node_ref和jffs2_full_dnode用来描述flash上的一个节点，struct jffs2_inode_cache代表一个文件，struct jffs2_full_dirent 代表一个目录项，struct jffs2_node_frag用于组织红黑树，因为jffs2文件的数据可能分散在flash上的各个地方，所以需要用红黑树来讲整个文件组织起来，便于索引和查找。

/*
  This is all we need to keep in-core for each raw node during normal
  operation. As and when we do read_inode on a particular inode, we can
  scan the nodes which are listed for it and build up a proper map of
  which nodes are currently valid. JFFSv1 always used to keep that whole
  map in core for each inode.
*/
/*  看英文注释，貌似这个结构体
 *  需要一直保存在内核内存里
 */
struct jffs2_raw_node_ref //内存中的结构体，代表flash上一个节点
{
	struct jffs2_raw_node_ref *next_in_ino; /* Points to the next raw_node_ref
		for this object. If this _is_ the last, it points to the inode_cache,
		xattr_ref or xattr_datum instead. The common part of those structures
		has NULL in the first word. See jffs2_raw_ref_to_ic() below */
	//在闪存上的节点的起始偏移都是4字节对齐的。JFFS2正好利用最低位作为此节点是否过时的标记
	uint32_t flash_offset; //相应数据实体在flash分区上的物理地址，若废弃则这个域被标记为废弃
#undef TEST_TOTLEN
#ifdef TEST_TOTLEN
	uint32_t __totlen; /* This may die; use ref_totlen(c, jeb, ) below */
#endif
};


/*
  Larger representation of a raw node, kept in-core only when the
  struct inode for this particular ino is instantiated.
*/
//英文注释说只要该文件的inode被实例化才需要在内存中生成这个结构体
struct jffs2_full_dnode
{
	struct jffs2_raw_node_ref *raw;
	uint32_t ofs; /* The offset to which the data of this node belongs */ //文件内的逻辑偏移
	uint32_t size;
	uint32_t frags; /* Number of fragments which currently refer //计数器
			to this node. When this reaches zero,
			the node is obsolete.  */
};

/*
  Fragments - used to build a map of which raw node to obtain
  data from for each part of the ino
*/
struct jffs2_node_frag //用于组织红黑树
{
	struct rb_node rb;
	struct jffs2_full_dnode *node; /* NULL for holes */
	uint32_t size;
	uint32_t ofs; /* The offset to which this fragment belongs */
};

/* For each inode in the filesystem, we need to keep a record of
   nlink, because it would be a PITA to scan the whole directory tree
   at read_inode() time to calculate it, and to keep sufficient information
   in the raw_node_ref (basically both parent and child inode number for
   dirent nodes) would take more space than this does. We also keep
   a pointer to the first physical node which is part of this inode, too.
*/
struct jffs2_inode_cache { //代表一个文件，文件的“内核描述符”
	/* First part of structure is shared with other objects which
	   can terminate the raw node refs' next_in_ino list -- which
	   currently struct jffs2_xattr_datum and struct jffs2_xattr_ref. */

	struct jffs2_full_dirent *scan_dents; /* Used during scan to hold
		temporary lists of dirents, and later must be set to
		NULL to mark the end of the raw_node_ref->next_in_ino
		chain. */
	struct jffs2_raw_node_ref *nodes;
	uint8_t class;	/* It's used for identification */

	/* end of shared structure */

	uint8_t flags;
	uint16_t state;
	uint32_t ino; //索引节点号
	struct jffs2_inode_cache *next; //用于哈希表
#ifdef CONFIG_JFFS2_FS_XATTR
	struct jffs2_xattr_ref *xref;
#endif
	uint32_t pino_nlink;	/* Directories store parent inode
				   here; other inodes store nlink. //硬链接个数
				   Zero always means that it's
				   completely unlinked. */
};

struct jffs2_full_dirent //代表一个目录项，挂载文件系统完成后会被释放，但后面还会用到
{
	union {
		struct jffs2_raw_node_ref *raw;
		struct jffs2_inode_cache *ic; /* Just during part of build */
	};
	struct jffs2_full_dirent *next;
	uint32_t version;
	uint32_t ino; /* == zero for unlink */
	unsigned int nhash;
	unsigned char type;
	unsigned char name[0];
};

对于整个文件系统的数据，用结构体struct jffs2_sb_info保存，每个jffs2文件系统只有一个struct jffs2_sb_info结构体实例。
这个数据结构实例被super_block结构体的s_fs_info成员指向，包含了该文件系统实例的各种块链表头，初始化时，会为成员inocache_list分配内存空间用于存储jffs2_inode_cache，也会为成员blocks分配内存空间存储各擦除块的信息，这个结构体包含了几乎整个文件系统的所有信息。

/* A struct for the overall file system control.  Pointers to
   jffs2_sb_info structs are named `c' in the source code.
   Nee jffs_control
*/
struct jffs2_sb_info {
	struct mtd_info *mtd;

	uint32_t highest_ino;//最高的索引节点编号
	uint32_t checked_ino;

	unsigned int flags;//挂载文件系统时指定的各种标志

	struct task_struct *gc_task;	/* GC task struct */
	struct completion gc_thread_start; /* GC thread start completion */
	struct completion gc_thread_exit; /* GC thread exit completion port */

	struct mutex alloc_sem;		/* Used to protect all the following
					   fields, and also to protect against
					   out-of-order writing of nodes. And GC. */
	uint32_t cleanmarker_size;	/* Size of an _inline_ CLEANMARKER
					 (i.e. zero for OOB CLEANMARKER */

	uint32_t flash_size; //从mtd_info结构体中获得
	uint32_t used_size; //擦除块相关域的总和  有效数据实体的空间大小
	uint32_t dirty_size; //擦除块相关域的总和  过时数据实体的空间大小
	uint32_t wasted_size;//擦除块相关域的总和  无法利用的空间大小
	uint32_t free_size;//擦除块相关域的总和  剩余空间大小
	uint32_t erasing_size;
	uint32_t bad_size;
	uint32_t sector_size;//从mtd_info结构体中获得
	uint32_t unchecked_size; //用于gc，剩下未检查的数据实体大小

	uint32_t nr_free_blocks;
	uint32_t nr_erasing_blocks;

	/* Number of free blocks there must be before we... */
	uint8_t resv_blocks_write;	/* ... allow a normal filesystem write */
	uint8_t resv_blocks_deletion;	/* ... allow a normal filesystem deletion */
	uint8_t resv_blocks_gctrigger;	/* ... wake up the GC thread */
	uint8_t resv_blocks_gcbad;	/* ... pick a block from the bad_list to GC */
	uint8_t resv_blocks_gcmerge;	/* ... merge pages when garbage collecting */
	/* Number of 'very dirty' blocks before we trigger immediate GC */
	uint8_t vdirty_blocks_gctrigger;

	uint32_t nospc_dirty_size;

	uint32_t nr_blocks;
	struct jffs2_eraseblock *blocks;	/* The whole array of blocks. Used for getting blocks
						 * from the offset (blocks[ofs / sector_size]) */
	struct jffs2_eraseblock *nextblock;	/* The block we're currently filling */ //当前写入操作发生所在的擦除块

	struct jffs2_eraseblock *gcblock;	/* The block we're currently garbage-collecting */

	struct list_head clean_list;		/* Blocks 100% full of clean data */ //只包含有效数据节点
	struct list_head very_dirty_list;	/* Blocks with lots of dirty space */ //所含数据节点大部分都已过时
	struct list_head dirty_list;		/* Blocks with some dirty space */ //至少含有一个过时数据节点
	struct list_head erasable_list;		/* Blocks which are completely dirty, and need erasing */
	struct list_head erasable_pending_wbuf_list;	/* Blocks which need erasing but only after the current wbuf is flushed */
	struct list_head erasing_list;		/* Blocks which are currently erasing */ //当前正在擦除
	struct list_head erase_checking_list;	/* Blocks which are being checked and marked */
	struct list_head erase_pending_list;	/* Blocks which need erasing now */ //当前正在等待擦除
	struct list_head erase_complete_list;	/* Blocks which are erased and need the clean marker written to them */
	struct list_head free_list;		/* Blocks which are free and ready to be used */ //擦除完成，并且已写入CLEANMARKER
	struct list_head bad_list;		/* Bad blocks. */ //含有损坏单元的擦除块链表
	struct list_head bad_used_list;		/* Bad blocks with valid data in. */ //含有损坏单元，但含有数据，本链表中的块迁移完成后也无法擦除再利用

	spinlock_t erase_completion_lock;	/* Protect free_list and erasing_list
						   against erase completion handler */
	wait_queue_head_t erase_wait;		/* For waiting for erases to complete */

	wait_queue_head_t inocache_wq;
	int inocache_hashsize;
	struct jffs2_inode_cache **inocache_list; //哈希表表头
	spinlock_t inocache_lock;

	/* Sem to allow jffs2_garbage_collect_deletion_dirent to
	   drop the erase_completion_lock while it's holding a pointer
	   to an obsoleted node. I don't like this. Alternatives welcomed. */
	struct mutex erase_free_sem;

	uint32_t wbuf_pagesize; /* 0 for NOR and other flashes with no wbuf */

#ifdef CONFIG_JFFS2_FS_WBUF_VERIFY
	unsigned char *wbuf_verify; /* read-back buffer for verification */
#endif
#ifdef CONFIG_JFFS2_FS_WRITEBUFFER
	unsigned char *wbuf; /* Write-behind buffer for NAND flash */
	//当应用系统对缓存中的数据进行更新时，缓存系统会在指定的时间后向底层数据源更新数据
	uint32_t wbuf_ofs;
	uint32_t wbuf_len;
	struct jffs2_inodirty *wbuf_inodes;
	struct rw_semaphore wbuf_sem;	/* Protects the write buffer */

	struct delayed_work wbuf_dwork; /* write-buffer write-out work */

	unsigned char *oobbuf;
	int oobavail; /* How many bytes are available for JFFS2 in OOB */
#endif

	struct jffs2_summary *summary;		/* Summary information */
	struct jffs2_mount_opts mount_opts;

#ifdef CONFIG_JFFS2_FS_XATTR
#define XATTRINDEX_HASHSIZE	(57)
	uint32_t highest_xid;
	uint32_t highest_xseqno;
	struct list_head xattrindex[XATTRINDEX_HASHSIZE];
	struct list_head xattr_unchecked;
	struct list_head xattr_dead_list;
	struct jffs2_xattr_ref *xref_dead_list;
	struct jffs2_xattr_ref *xref_temp;
	struct rw_semaphore xattr_sem;
	uint32_t xdatum_mem_usage;
	uint32_t xdatum_mem_threshold;
#endif
	/* OS-private pointer for getting back to master superblock info */
	void *os_priv;
};

对于文件的信息，由结构体struct jffs2_inode_info保存，这个结构体实例被结构体inode的i_private成员指向

struct jffs2_inode_info {
	/* We need an internal mutex similar to inode->i_mutex.
	   Unfortunately, we can't used the existing one, because
	   either the GC would deadlock, or we'd have to release it
	   before letting GC proceed. Or we'd have to put ugliness
	   into the GC code so it didn't attempt to obtain the i_mutex
	   for the inode(s) which are already locked */
	struct mutex sem;//实现底层读写执行流与gc之间的同步，因为gc需要写操作

	/* The highest (datanode) version number used for this ino */
	uint32_t highest_version;

	/* List of data fragments which make up the file */
	struct rb_root fragtree;

	/* There may be one datanode which isn't referenced by any of the
	   above fragments, if it contains a metadata update but no actual
	   data - or if this is a directory inode */
	/* This also holds the _only_ dnode for symlinks/device nodes,
	   etc. */
	struct jffs2_full_dnode *metadata; //符号链接和设备文件

	/* Directory entries */
	struct jffs2_full_dirent *dents;//打开文件时为数据实体的内核描述符jffs2_raw_node_ref创建相应的jffs2_full_dirent，并组织为由dents指向的链表

	/* The target path if this is the inode of a symlink */
	unsigned char *target;

	/* Some stuff we just have to keep in-core at all times, for each inode. */
	struct jffs2_inode_cache *inocache;//指向该文件的内核描述符jffs2_inode_cache结构

	uint16_t flags;
	uint8_t usercompr;
	struct inode vfs_inode;
};

另外，对于擦除块的信息，由结构体struct jffs2_eraseblock描述，一个jffs2_eraseblock描述一个擦除块，结构体包含了该擦除块的相关信息

struct jffs2_eraseblock //擦除块描述符
{
	struct list_head list;
	int bad_count;
	uint32_t offset;		/* of this block in the MTD */

	uint32_t unchecked_size;
	uint32_t used_size;
	uint32_t dirty_size;
	uint32_t wasted_size;
	uint32_t free_size;	/* Note that sector_size - free_size
				   is the address of the first free space */ //重要
	uint32_t allocated_refs;
	struct jffs2_raw_node_ref *first_node;
	struct jffs2_raw_node_ref *last_node;

	struct jffs2_raw_node_ref *gc_node;	/* Next node to be garbage collected */ //指向当前要被gc的数据实体的描述符
};

另外，jffs2有个重要的宏write_ofs，用于获得当前写入的位置

#define write_ofs(c) ((c)->nextblock->offset + (c)->sector_size - (c)->nextblock->free_size)

这里解释一下上面这个宏函数，在jffs2代码中，c一般表示jffs2_sb_info，当前写入的块为jffs2_sb_info->nextblock所以写入偏移即为块偏移 + 块已写空间大小 = 块偏移 + （块大小 - 剩余空间大小），写操作都是从块头开始写，一直写到块尾，如果是nor flash，则sector_size就是擦除块大小

flash上的summary结构体

每个擦除块有且只有一个结构体struct jffs2_sum_marker实例，这个结构体保留在每个擦除块的最后8bytes，其offset指向本擦除块中存放jffs2_raw_summary的位置，jffs2_raw_summary之后便是存放struct jffs2_sum_inode_flash和struct jffs2_sum_dirent_flash的地方了，平时jffs2不会去写这些数据，但会计算，如果写完某个节点的数据，再加上summary的信息，这个擦除块能写满的话，那就将summary信息写下，然后做好jffs2_sum_marker，由于本擦除块被写满，选择下一个擦除块去写。
在文件系统挂载的时候，jffs2_scan_eraseblock()会去读取每个擦除块的最后8bytes。如果验证是有效的sum_marker节点，就会根据jffs2_sum_marker中的offset偏移去读取summary node所有在挂载中需要的信息都存放在summary node节点中，因此就没必要扫描整个擦除块

/* Summary marker is stored at the end of every sumarized erase block */
struct jffs2_sum_marker //记录summary node信息的存储位置，即jffs2_raw_summary结构体的存储位置
{
	jint32_t offset;	/* offset of the summary node in the jeb */ //指向jffs2_raw_summary
	jint32_t magic; 	/* == JFFS2_SUM_MAGIC */
};

struct jffs2_sum_marker的offset成员指向的位置存放着结构体struct jffs2_raw_summary，每个擦除块有且仅有一个实例

struct jffs2_raw_summary
{
	jint16_t magic;
	jint16_t nodetype; 	/* = JFFS2_NODETYPE_SUMMARY */
	jint32_t totlen;
	jint32_t hdr_crc;
	jint32_t sum_num;	/* number of sum entries*/ //等于本擦除块中数据实体（jffs2_raw_dirent、jffs2_raw_inode）的个数
	jint32_t cln_mkr;	/* clean marker size, 0 = no cleanmarker */
	jint32_t padded;	/* sum of the size of padding nodes */
	jint32_t sum_crc;	/* summary information crc */
	jint32_t node_crc; 	/* node crc */
	jint32_t sum[0]; 	/* inode summary info */ //接下来就是存放struct jffs2_sum_inode_flash和struct jffs2_sum_dirent_flash的地方了，这两个结构体通过他们的第一个成员nodetype区分
};

在结构体struct jffs2_raw_summary后面，接连存放多个struct jffs2_sum_inode_flash和struct jffs2_sum_dirent_flash实例，他们通过通过第一个成员nodetype区别

struct jffs2_sum_inode_flash //该节点包含在扫描过程中所需的dnode节点必要信息
{
	jint16_t nodetype;	/* node type */
	jint32_t inode;		/* inode number */
	jint32_t version;	/* inode version */
	jint32_t offset;	/* offset on jeb */ //所描述的数据实体在擦除块内偏移
	jint32_t totlen; 	/* record length */ //数据实体的总长度
} __attribute__((packed));

//与jffs2_sum_inode_flash结构体通过第一个成员nodetype区别
//存放在flash上的节点 3
struct jffs2_sum_dirent_flash //该节点包含扫描过程中所需的dirent节点的必要信息
{
	jint16_t nodetype;	/* == JFFS_NODETYPE_DIRENT */
	jint32_t totlen;	/* record length */
	jint32_t offset;	/* offset on jeb */
	jint32_t pino;		/* parent inode */
	jint32_t version;	/* dirent version */
	jint32_t ino; 		/* == zero for unlink */
	uint8_t nsize;		/* dirent name size */
	uint8_t type;		/* dirent type */
	uint8_t name[0];	/* dirent name */
} __attribute__((packed));