/*
* Copyright (c) International Business Machines Corp., 2000-2002
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
* the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <config.h>
#include <time.h>
#include <stdio.h>
#include <stdlib.h>
#include <memory.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <assert.h>
#include "jfs_types.h"
#include "jfs_endian.h"
#include "jfs_filsys.h"
#include "jfs_superblock.h"
#include "jfs_dinode.h"
#include "jfs_dtree.h"
#include "jfs_xtree.h"
#include "jfs_logmgr.h"
#include "jfs_dmap.h"
#include "jfs_imap.h"
#include "logredo.h"
#include "logform.h"
#include "devices.h"
#include "debug.h"
#include "utilsubs.h"
#include "fsck_message.h" /* for chkdsk message logging facility */
/* + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
*
* L O C A L M A C R O D E F I N I T I O N S
*
*/
#define MAKEDEV(__x,__y) (dev_t)(((__x)<<16) | (__y))
#define LOGPNTOB(x) ((x)<<L2LOGPSIZE)
#define LOG2NUM(NUM, L2NUM)\
{\
if ((NUM) <= 0)\
L2NUM = -1;\
else\
if ((NUM) == 1)\
L2NUM = 0;\
else\
{\
L2NUM = 0;\
while ( (NUM) > 1 )\
{\
L2NUM++;\
(NUM) >>= 1;\
}\
}\
}
/* + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
*
* R E M E M B E R M E M O R Y A L L O C F A I L U R E
*
*/
int32_t Insuff_memory_for_maps = 0;
char *available_stg_addr = NULL;
int32_t available_stg_bytes = 0;
char *bmap_stg_addr = NULL;
int32_t bmap_stg_bytes = 0;
/* + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
*
* S T U F F F O R T H E L O G
*
*/
struct logsuper logsup; /* log super block */
int32_t numdoblk; /* number of do blocks used */
int32_t numnodofile; /* number of nodo file blocks used */
int32_t numExtDtPg = 0; /* number of extended dtpage blocks used */
/*
* open file system aggregate/lv array
*
* logredo() processes a single log.
*
* In the first release, logredo will process a single log which relates
* to the single fileset in a single aggregate. In some future release,
* a single log may be used for multiple filesets which may or may not all
* reside in the same aggregate.
*
*/
struct vopen vopen[MAX_ACTIVE];
struct log_info Log;
struct {
uuid_t uuid;
FILE *fp;
} primary_vol;
extern int LogOpenMode; /* logdump sets this to O_RDONLY */
/*
* if this flag is set then the primary superblock is
* corrupt. The secondary superblock is good, but chkdsk
* wasn't able to fix the primary version. logredo can
* run, but must use the secondary version of the
* aggregate superblock
*/
int32_t use_2ndary_agg_superblock;
/*
* file system page buffer cache
*
* for k > 0, bufhdr[k] describes contents of buffer[k-1].
* bufhdr[0] is reserved as anchor for free/lru list:
* bufhdr[0].next points to the MRU buffer (head),
* bufhdr[0].prev points to the LRU buffer (tail);
*/
/* buffer header table */
struct bufhdr {
int16_t next; /* 2: next on free/lru list */
int16_t prev; /* 2: previous on free/lru list */
int16_t hnext; /* 2: next on hash chain */
int16_t hprev; /* 2: previous on hash chain */
char modify; /* 1: buffer was modified */
char inuse; /* 1: buffer on hash chain */
int16_t reserve; /* 2 */
int32_t vol; /* 4: minor of agrregate/lv number */
pxd_t pxd; /* 8: on-disk page pxd */
} bufhdr[NBUFPOOL]; /* (24) */
/* buffer table */
struct bufpool {
char bytes[PSIZE];
} buffer[NBUFPOOL - 1];
/*
* log page buffer cache
*
* log has its own 4 page buffer pool.
*/
uint8_t afterdata[LOGPSIZE * 2]; /* buffer to read in redopage data */
/*
* Miscellaneous
*/
extern caddr_t prog; /* Program name */
extern int32_t mntcnt;
extern int32_t bufsize;
extern char *mntinfo;
extern int32_t retcode; /* return code from logredo */
int end_of_transaction = 0;
/*
* external references
*/
extern char *optarg;
extern int optind;
extern int initMaps(int32_t);
extern int updateMaps(int);
extern int findEndOfLog(void);
extern int logRead(int32_t, struct lrd *, char *);
extern int logredoInit(void);
extern int doCommit(struct lrd *);
extern int doExtDtPg(void);
extern int doNoRedoFile(struct lrd *, uint32_t);
extern int doNoRedoPage(struct lrd *);
extern int doNoRedoInoExt(struct lrd *);
extern int doAfter(struct lrd *, int32_t);
extern int doUpdateMap(struct lrd *);
extern int alloc_wrksp(uint32_t, int, int, void **);
extern FILE * open_by_label(uuid_t, int, int, char *, int *);
extern char log_device[];
/*
* forward references
*/
int doMount(struct lrd *);
int openVol(int32_t);
int updateSuper(int vol);
int rdwrSuper(FILE *, struct superblock *, int32_t);
int bflush(int32_t, struct bufpool *);
int logOpen(void);
int fsError(int, int, int64_t);
int logError(int, int);
static int recoverExtendFS(FILE *);
int alloc_storage(int32_t, void **, int32_t *);
int alloc_dmap_bitrec(struct dmap_bitmaps **);
/*
* debug control
*/
#ifdef _JFS_DEBUG
int32_t dflag = 1;
time_t *Tp;
uint32_t tp_start, tp_end;
int xdump(char *, int);
int x_scmp(char *, char *);
void x_scpy(char *, char *);
int prtdesc(struct lrd *);
#else
int32_t dflag = 0;
#endif
/*
* NAME: jfs_logredo()
*
* FUNCTION: Replay all transactions committed since the most
* recent synch point.
*
* NOTES:
* >>>>>> The log replay is accomplished in one pass over the
* log, reading backwards from logend to the first synch
* point record encountered. This means that the log
* entries are read and processed in LIFO (Last-In-First-Out)
* order. In other words, the records logged latest in
* time are the first records processed during log replay.
*
* >>>>>> Inodes, index trees, and directory trees
*
* Inodes, index tree structures, and directory tree
* structures are handled by processing committed redopage
* records which have not been superceded by noredo records.
* This processing copies data from the log record into the
* appropriate disk extent page(s).
*
* To ensure that only the last (in time) updates to any
* given disk page are applied during log replay, logredo
* maintains a record (union structure summary1/summary2),
* for each disk page which it has processed, of which
* portions have been updated by log records encountered.
*
* >>>>>> Inode Allocation Map processing
* The xtree for the Inode Allocation Map is journaled, and
* a careful write is used to update it during commit
* processing.
* The imap index tree is also duplicated at the known location. (TBD)
* So at logredo time, the xtree for imap is always readable and correct.
* This is the basic requirement from logredo.
*
* the inode map control page (struct dinomap) is only flushed to disk at
* the umount time. For iag, pmap will go to disk at commit time.
* iagnum will not change in run-time.
* agstart field will stable without extendfs utility. It is TBD for
* how to handle agstart when extendfs utility is available.
* Other fields ( wmap. inosmap, extsmap ino free list pointers,
* ino ext free list pointers ) are at working status ( i.e they are
* updated in run-time. So the following
* meta-data of the imap need to be reconstructed at the logredo time:
* 1) IAGs, the pmap of imap and inoext array are contained in IAGs.
* 2) AG Free inode list
* 3) AG Free Inode Extent list
* 4) IAG Free list
*
* There are two imaps need to take care of :
* 1) aggregate imap
* 2) fileset imap
* For the first release, the aggregate imap is stable and we only
* need to deal with the fileset imap.
*
* Block Allocation Map (bmap file) is for an aggregate/lv. There are
* three fields related to the size of bmap file.
* 1) superblock.s_size: This field indicates aggregate size. It
* tells number of sector-size blocks for this
* aggregate. The size of aggregate determines
* the size of its bmap file.
* Since the aggregate's superblock is updated
* using sync-write, superblock.s_size is trustable
* at logredo time.
* note1: mkfs reserves the fsck space. So s_size really
* inidcate (size_of_aggregate - fsck_reserve_space)
* note2: At the mkfs time, "-s" parameter could be used
* to indicate how large the aggregate/filesystem is.
* One lv contains at most one aggregate/filesystem.
* If "-s" gives the value is smaller than the size
* of lv, it is ok. The space is just wasted.
*
* Without "-s" parameter, mkfs wil use the whole
* size of lv to make an aggregate/filesystem.
* That is usually the case. So we can also say
* an aggregate/lv. "-s" is often used for test.
*
* 2) dbmap.dn_mapsize: This field also indicates aggregate/lv size.
* It tells number of aggre. blocks in the
* aggregate/lv. Without extendfs, this field should
* be equivalent to superblock.s_size.
* With extendfs, this field may not be updated
* before a system crash happens. So logredo
* need to update it.
* 3) dinode.di_size: For an inode of bmap file, this field indicates
* the logical size of the file. I.e. it contains
* the offset value of the last byte written
* in the file plus one.
* So di_size will include the bmap control page,
* the dmap control pages and dmap pages.
* In the JFS, if a file is a sparse file, the logical
* size is different from its physical size.
* The bmap file is a sparse file if the total of
* dmap pages is ( < 1024) or ( < 1024 * 1024).
* In that case, physically L1.0, and/or L2 does
* not exist, but di_size will include their page
* size.
*
* Note: The di_size does NOT contain the logical
* structure of the file, i.e. the space allocated
* for the xtree stuff is not indicated in di_size.
* It is indicated in di_nblocks.
*
* In addition, the mkfs always put one more dmap
* page into the bmap file for preparing extendfs.
* This hidden dmap page cannot be figured out from
* superblock.s_size, but di_size includes it. Any
* dmapctl pages caused by this hidden dmap page
* are also included in di_size.
*
* The bmap control page, dmap control pages and dmap pages are all
* needed to rebuild at logredo time.
*
* In overall, the following actions are taken at logredo time:
* 1) apply log rec data to the specified page.
* 2) initialize freelist for dtree page or root.
* 3) rebuilt imap
* 4) rebuilt bmap
* in addition, in order to ensure the log record only applying to a
* certain portion of page one time, logredo will start NoRedoFile,
* NoRedoExtent/NoRedoPage filter in the process for accuracy and
* efficiency.
*
* The three log rec types: REDOPAGE, NOREDOPAGE, NOREDOINOEXT, and
* UPDATEMAP, are the main force to initiate these actions. See
* comments on doAfter(), updatePage(), doNoRedoPage(), doNoRedoInoExt,
* and doUpdateMap() for detailed information.
*
* If the aggregate/lv has state of FM_DIRTY, then fsck will run
* after the logredo process since logredo could not get 100%
* recovery. Currently bmap rebuild is slow ( 1 min per 32 GB),
* so logredo will NOT rebuild imap and bmap if fsck will do it
* anyway. But logredo still read maps in and mark them for starting
* NoRedoExtent/NoRedoPage filter.
*
* The maps are rebuilt in the following way:
* at the init phase, storage is allocated for the whole map file for
* both imap and bmap. Reading in the map files from the disk.
* The wmap is inited to zero. At the logredo time, the wmap is used
* to track the bits in pmap. In the beginning of the logredo process
* the allocation status of every block is in doubt. As log records
* are processed, the allocation state is determined and the bit of pmap
* is updated. This fact is recorded in the corresponding bits in wmap.
* So a pmap bit is only updated once at logredo time and only updated
* by the latest in time log record.
* At the end of logredo, the control information, the freelist, etc.
* are built from the value of pmap; then pmap is copied to wmap and
* the whole map is written back to disk.
*
* the status field s_state in the superblock of each file-system is
* set to FM_CLEAN provided the initial status was either FM_CLEAN
* or FM_MOUNT and logredo processing was successful. If an error
* is detected in logredo the status is set to FM_LOGREDO. the status
* is not changed if its initial value was FM_MDIRTY. fsck should be
* run to clean-up the probable damage if the status after logredo
* is either FM_LOGREDO or FM_MDIRTY.
*
* The log record has the format:
* <LogRecordData><LogRecLRD>
* At logredo time, the log is read backward. So for every log rec,
* we read LogRecLRD, which tells how long the LogRecordData is.
* see comments on updatePage() for detailed info of log record format.
*
*.....................................................................
* The logredo handles the log-within-file-system (aka inline log) issue:
*.....................................................................
* For AIX, we always deal with the outline log, i.e. the log resides
* in a separate logical volume. A log is associated with one volume
* group and can be shared by many file systems with this volume group.
* In AIX, the logredo received a device name. It then determines if
* this device is a log name or a filesystem name. If it is a filesustem
* name, get the log minor number for this filesystem. If it is a log name,
* get its minor number.
*
* XJFS decided to put log inside the file system
*
* For supporting the inline log, the above AIX logic should be changed.
*
* Here is the outline:
*
* When the logredo received a device name, it first read the SIZE_OF_SUPER
* bytes from SUPER1_OFF offset to see if it is a file system superblock.
* If yes, check the s_flag to see if it has a inline log or outline log.
* for an inline log the s_logdev should match the input device name's
* major and minor number. If not, an error is returned and logredo exit.
* If no error, the logredo read the log superblock according the log info
* in the fs superblock.
* If the device name does not represent a filesystem device, then logredo
* read the LOGPSIZE bytes from the log page 1 location. If it indicates
* a log device, then open the filesystems according to the log superblock's
* active list. For each filesystem in the active list, read its superblock
* if one of the superblock indicates that it uses an inline log, return
* an error. It is a system code bug if some filesystems use inline log
* and some use outline log.
* If the superblock indicates it used an outline log, check the superblock's
* s_logdev to match the input device name's major and minor numbers.
* If one of them does not match, return error. -- It is a system code bug,
* if some match and some not match; -- It should either match all or non of
* them match. The AIX logredo never check s_logdev with the input log device.
* We should check here.
*
* for outline log, logredo will be called once to cover all the file
* systems in the log superblock's active list.
* For inline log, logredo will be called many times. Each time is for
* one file system. The log superblock's active list has nothing. The
* logmajor and logminor contains file system's major and minor number.
*
*.....................................................................
* logredo handles support EA:
*.....................................................................
* There is 16-byte EA descriptor which is located in the section I of
* dinode.
* The EA can be inline or outline. If it is inlineEA then the data will
* occupy the section IV of the dinode. The dxd_t.flag will indicate so.
* If it is outlineEA, dxd_t.flag will indicate so and the single extent
* is described by EA descriptor.
*
* The section IV of dinode has 128 byte. It is shared by the xtroot and
* inlineEA. The sharing is in FCFS style. If xtree gets the section IV,
* xtree will never give it away even if xtree is shrink or split.
* If inlineEA gets it, there is a chance that later inlineEA is freed and
* so xtree still can get it.
*
* for outlineEA, the XJFS will syncly write the data portion out so there
* is no log rec for the data, but there is still an INODE log rec for EA
* descriptor changes and there is a UPDATEMAP log rec for the allocated
* pxd. If an outlineEA is freed, there are also two log records for it:
* one is INODE with EA descriptor zeroed out, another is the UPDATEMAP
* log rec for the freed pxd.
* For inlineEA, it has to be recorded in the log rec. It is not in a
* separate log rec. Just one additional segment is added into the
* INODE log rec. So an INODE log rec can have at most three segments:
* when the parent and child inodes are in the same page, then there are
* one segment for parent base inode; one segment for child base inode;
* and maybe the third one for the child inlineEA data.
*....................................................................
* 32-bit vs 64-bit
* At the first release. assume that a file system will not be larger
* than 32-bit.
*....................................................................
* TBD:
* the method for handling crashes in the middle of extending a file
* system is as follows. the size of a filesystem is established from
* the superblock.s_size field (i.e the sizes in the diskmap
* and inodemaps are ignored). in extendfs (jfs_cntl.c) the superblock
* is not updated before the maps have been extended and the new inodes
* formatted to zeros. no allocations in the new part of the filesystem
* occur prior to the change in map sizes. if a crash occurs just
* before updating the superblock, the map sizes will be their old
* values. in this case the maps as files may be bigger than necessary.
* if the crash occurs just after writing the super block, the map sizes
* are fixed up here.
*/
int jfs_logredo(caddr_t pathname, FILE *fp, int32_t use_2nd_aggSuper)
{
int rc;
int k, logaddr, nextaddr, lastaddr, nlogrecords;
int syncrecord = 0;
struct lrd ld;
int lowest_lr_byte = 2 * LOGPSIZE + LOGPHDRSIZE;
int highest_lr_byte = 0;
int log_has_wrapped = 0;
int logend;
int in_use;
/*
* store away the indicator of which aggregate superblock
* to use
*/
use_2ndary_agg_superblock = use_2nd_aggSuper;
/*
* loop until we get enough memory to read vmount struct
*/
mntinfo = (char *) &bufsize;
bufsize = sizeof (int);
/*
* validate that the log is not currently in use;
*/
rc = findLog(fp, &in_use);
if (rc < 0) {
fsck_send_msg(lrdo_DEVOPNREADERROR);
return (rc);
}
/* recover from extendfs() ? */
if (Log.location & INLINELOG && (vopen[0].status & FM_EXTENDFS)) {
fsck_send_msg(lrdo_REXTNDBEGIN);
rc = recoverExtendFS(fp);
fsck_send_msg(lrdo_REXTNDDONE);
return rc;
}
/*
* validate log superblock
*
* aggregate block size is for log file as well.
*/
rc = ujfs_rw_diskblocks(Log.fp,
(uint64_t) (Log.xaddr +
LOGPNTOB(LOGSUPER_B)),
(unsigned) sizeof (struct logsuper), (char *) &logsup, GET);
if (rc != 0) {
fsck_send_msg(lrdo_CANTREADLOGSUP);
rc = LOGSUPER_READ_ERROR;
goto error_out;
}
ujfs_swap_logsuper(&logsup);
if (logsup.magic != LOGMAGIC) {
fsck_send_msg(lrdo_LOGSUPBADMGC);
rc = NOT_LOG_FILE_ERROR;
goto error_out;
}
if (logsup.version > LOGVERSION) {
fsck_send_msg(lrdo_LOGSUPBADVER);
rc = JFS_VERSION_ERROR;
goto error_out;
}
if (Log.location & OUTLINELOG) {
struct stat st;
if ((rc = fstat(fileno(Log.fp), &st)))
goto error_out;
Log.devnum = st.st_rdev;
if (in_use) {
fsck_send_msg(lrdo_LOGINUSE);
return LOG_IN_USE;
}
}
if (logsup.state == LOGREDONE) {
fsck_send_msg(lrdo_ALREADYREDONE);
if (Log.location & INLINELOG)
if ((rc = updateSuper(0)) != 0) {
fsck_send_msg(lrdo_CANTUPDLOGSUP);
return (rc);
}
return (0);
}
Log.size = logsup.size;
Log.serial = logsup.serial;
/*
* find the end of log
*/
logend = findEndOfLog();
if (logend < 0) {
fsck_send_msg(lrdo_LOGEND, logend);
fsck_send_msg(lrdo_LOGENDBAD1);
logError(LOGEND, 0);
ujfs_swap_logsuper(&logsup);
rc = ujfs_rw_diskblocks(Log.fp,
(Log.xaddr + LOGPNTOB(LOGSUPER_B)),
(unsigned long) LOGPSIZE, (char *) &logsup, PUT);
rc = logend;
goto error_out;
}
/*
* allocate/initialize logredo runtime data structures and
* initialize each file system associated with the log based on
* the contents of its superblock
*/
if ((rc = logredoInit()) != 0) {
fsck_send_msg(lrdo_INITFAILED, rc, errno);
goto error_out;
}
highest_lr_byte = logsup.size * LOGPSIZE - LOGRDSIZE;
if ((logend < lowest_lr_byte) || (logend > highest_lr_byte)) {
fsck_send_msg(lrdo_LOGEND, logend);
fsck_send_msg(lrdo_LOGENDBAD2);
rc = INVALID_LOGEND;
goto error_out;
}
/*
* replay log
*
* read log backwards and process records as we go.
* reading stops at place specified by first SYNCPT we
* encounter.
*/
nlogrecords = lastaddr = 0;
nextaddr = logend;
do {
logaddr = nextaddr;
nextaddr = logRead(logaddr, &ld, afterdata);
DBG_TRACE(("Logaddr=%x\nNextaddr=%x\n", logaddr, nextaddr))
nlogrecords += 1;
/*
*
* Validate the nextaddr as much as possible
*
*/
if (nextaddr < 0) {
fsck_send_msg(lrdo_NEXTADDRINVALID);
rc = nextaddr;
goto error_out;
}
if ((nextaddr < lowest_lr_byte)
|| (nextaddr > highest_lr_byte)) {
fsck_send_msg(lrdo_NEXTADDROUTRANGE, nextaddr);
rc = INVALID_NEXTADDR;
goto error_out;
}
if (nextaddr == logaddr) {
fsck_send_msg(lrdo_NEXTADDRSAME, nextaddr);
rc = NEXTADDR_SAME;
goto error_out;
}
if (nextaddr > logaddr) {
if (log_has_wrapped) {
fsck_send_msg(lrdo_LOGWRAPPED);
rc = LOG_WRAPPED_TWICE;
goto error_out;
} else {
log_has_wrapped = -1;
}
}
/*
*
* The addresses seem ok. Process the current record.
*
*/
switch (ld.type) {
case LOG_COMMIT:
rc = doCommit(&ld);
if (rc) {
fsck_send_msg(lrdo_BADCOMMIT, logaddr);
goto error_out;
}
break;
case LOG_MOUNT:
fsck_send_msg(lrdo_MOUNTRECORD, logaddr);
rc = doMount(&ld);
if (rc) {
fsck_send_msg(lrdo_BADMOUNT, logaddr);
goto error_out;
}
break;
case LOG_SYNCPT:
fsck_send_msg(lrdo_SYNCRECORD, logaddr);
rc = 0;
if (lastaddr == 0) {
syncrecord = logaddr;
lastaddr = (ld.log.syncpt.sync == 0)
? logaddr : ld.log.syncpt.sync;
}
break;
case LOG_REDOPAGE:
DBG_TRACE(("jfs_logredo:Case Log_redoPage"))
rc = doAfter(&ld, logaddr);
if (rc) {
fsck_send_msg(lrdo_BADREDOPAGE, logaddr);
goto error_out;
}
break;
case LOG_NOREDOPAGE:
DBG_TRACE(("jfs_logredo:Case Log_noredopage"))
rc = doNoRedoPage(&ld);
if (rc) {
fsck_send_msg(lrdo_BADNOREDOPAGE, logaddr);
goto error_out;
}
break;
case LOG_NOREDOINOEXT:
DBG_TRACE(("jfs_logredo:Case Log_noredoinoext"))
rc = doNoRedoInoExt(&ld);
if (rc) {
fsck_send_msg(lrdo_BADNOREDOINOEXT, logaddr);
goto error_out;
}
break;
case LOG_UPDATEMAP:
rc = doUpdateMap(&ld);
if (rc) {
fsck_send_msg(lrdo_BADUPDATEMAP, logaddr);
goto error_out;
}
break;
default:
fsck_send_msg(lrdo_UNKNOWNTYPE, logaddr);
rc = UNRECOG_LOGRECTYP;
goto error_out;
break;
}
if (rc < 0) {
fsck_send_msg(lrdo_ERRORNEEDREFORMAT);
goto error_out;
}
if (rc != 0) {
fsck_send_msg(lrdo_ERRORCANTCONTIN);
goto error_out;
}
/*
* If the transaction just completed was the last
* for the current transaction, then flush the
* buffers.
*/
if (end_of_transaction != 0) {
for (k = 1; k < NBUFPOOL; k++) {
if ((rc = bflush(k, &buffer[k - 1])) != 0)
goto error_out;
}
end_of_transaction = 0;
}
} while (logaddr != lastaddr);
/*
* If any 'dtpage extend' records were processed, then we need
* to go back and rebuild their freelists. This cannot be done
* when the 'dtpage extend' record is processed, since there may
* be records processed later which affect the previous (shorter)
* version of the dtpage. Only after all these records are processed
* can we safely and accurately rebuild the freelist.
*/
if (numExtDtPg != 0) {
rc = doExtDtPg();
}
/*
* flush data page buffer cache
*/
for (k = 1; k < NBUFPOOL; k++) {
if ((rc = bflush(k, &buffer[k - 1])) != 0)
break;
}
/*
* finalize file systems
*
* update allocation map and superblock of file systems
* of volumes which are open if they were modified here.
* i.e. if they were not previously unmounted cleanly.
*/
for (k = 0; k < MAX_ACTIVE; k++) {
if (vopen[k].state != VOPEN_OPEN)
continue;
if ((rc = updateMaps(k)) != 0) {
fsck_send_msg(lrdo_ERRORCANTUPDMAPS);
goto error_out;
}
/* Make sure all changes are committed to disk before we
* mark the superblock clean
*/
ujfs_flush_dev(vopen[k].fp);
if ((rc = updateSuper(k)) != 0) {
fsck_send_msg(lrdo_ERRORCANTUPDFSSUPER);
goto error_out;
}
/* sync superblock before journal is finalized */
ujfs_flush_dev(vopen[k].fp);
}
/*
* finalize log.
*
* clear active list.
* If this is a fully replayed log then it can be moved to earlier
* versions of the operating system. Therefore switch the magic
* number to the earliest level.
*/
if (logsup.state != LOGREADERR) {
for (k = 0; k < MAX_ACTIVE; k++)
uuid_clear(logsup.active[k]);
logsup.end = logend;
logsup.state = LOGREDONE;
logsup.magic = LOGMAGIC;
}
ujfs_swap_logsuper(&logsup);
rc = ujfs_rw_diskblocks(Log.fp, (Log.xaddr + LOGPNTOB(LOGSUPER_B)),
LOGPSIZE, (char *) &logsup, PUT);
/*
* now log some info for the curious
*/
fsck_send_msg(lrdo_LOGEND, logend);
fsck_send_msg(lrdo_RPTSYNCNUM, syncrecord);
fsck_send_msg(lrdo_RPTSYNCADDR, lastaddr);
fsck_send_msg(lrdo_RPTNUMLOGREC, nlogrecords);
fsck_send_msg(lrdo_RPTNUMDOBLK, numdoblk);
fsck_send_msg(lrdo_RPTNUMNODOBLK, numnodofile);
error_out:
if (rc > 0) {
rc = rc * (-1);
}
/*
* If everything went ok except that we didn't have
* enough memory to deal with the block map, tell chkdsk
* to be sure to do a full check and repair, but that a log
* format is not necessary
*/
if ((rc == 0) && Insuff_memory_for_maps) {
rc = ENOMEM25;
}
return (rc);
}
/*
* NAME: doMount(ld)
*
* FUNCTION: a log mount record is the first-in-time record which is
* put in the log so it is the last we want to process in
* logredo. so we mark volume as cleanly unmounted in vopen
* array. the mount record is imperative when the volume
* is a newly made filesystem.
*/
int doMount(struct lrd *ld)
{ /* pointer to record descriptor */
int vol, status;
vol = ld->aggregate;
status = vopen[vol].status;
DBG_TRACE(("Logredo:domount: status=%d\n", status))
if (!(status & (FM_LOGREDO | FM_DIRTY)))
vopen[vol].status = FM_CLEAN;
return (0);
}
/*
* NAME: openVol(vol)
*
* FUNCTION: open the aggregate/volume specified.
* check if it was cleanly unmounted. also check log
* serial number. initialize disk and inode mpas.
*/
int openVol(int vol)
{ /* device minor number of aggregate/lv */
int rc, l2agsize, agsize;
int64_t fssize; /* number of aggr blks in the aggregate/lv */
struct superblock sb;
int aggsb_numpages;
if (Log.location & OUTLINELOG) {
/* First check if this is the already opened volume */
if (!uuid_compare(vopen[vol].uuid, primary_vol.uuid))
vopen[vol].fp = primary_vol.fp;
else {
vopen[vol].fp = open_by_label(vopen[vol].uuid, 0, 0,
NULL, NULL);
if (vopen[vol].fp == NULL)
return ENOENT;
}
}
/* read superblock of the aggregate/volume */
if ((rc = rdwrSuper(vopen[vol].fp, &sb, PB_READ)) != 0) {
fsck_send_msg(lrdo_CANTREADFSSUPER);
fsError(READERR, vol, SUPER1_B);
vopen[vol].state = VOPEN_CLOSED;
return (FSSUPER_READERROR1);
}
/* check magic number and initialize version specific
* values in the vopen struct for this vol.
*/
if (strncmp(sb.s_magic, JFS_MAGIC, (unsigned) strlen(JFS_MAGIC))) {
fsck_send_msg(lrdo_FSSUPERBADMAGIC);
vopen[vol].state = VOPEN_CLOSED;
return (LOGSUPER_BADMAGIC);
}
if (sb.s_version > JFS_VERSION) {
fsck_send_msg(lrdo_FSSUPERBADMAGIC);
vopen[vol].state = VOPEN_CLOSED;
return (LOGSUPER_BADVERSION);
}
if (Log.location & OUTLINELOG && (sb.s_flag & (JFS_INLINELOG == JFS_INLINELOG))) {
fsck_send_msg(lrdo_FSSUPERBADLOGLOC);
vopen[vol].state = VOPEN_CLOSED;
return (LOGSUPER_BADLOGLOC);
}
vopen[vol].lblksize = sb.s_bsize;
vopen[vol].l2bsize = sb.s_l2bsize;
vopen[vol].l2bfactor = sb.s_l2bfactor;
fssize = sb.s_size >> sb.s_l2bfactor;
vopen[vol].fssize = fssize;
vopen[vol].agsize = sb.s_agsize;
/* LOG2NUM will alter agsize, so use local var (Then why don't we
fix LOG2NUM?) */
agsize = vopen[vol].agsize;
LOG2NUM(agsize, l2agsize);
vopen[vol].numag = fssize >> l2agsize;
if (fssize & (vopen[vol].agsize - 1))
vopen[vol].numag += 1;
vopen[vol].l2agsize = l2agsize;
if (Log.location & INLINELOG) {
/*
* Now that the aggregate superblock has been read, do some
* more validation of the log superblock
*/
if (logsup.bsize != vopen[vol].lblksize) {
fsck_send_msg(lrdo_LOGSUPBADBLKSZ);
return JFS_BLKSIZE_ERROR;
}
if (logsup.l2bsize != vopen[vol].l2bsize) {
fsck_send_msg(lrdo_LOGSUPBADL2BLKSZ);
return JFS_L2BLKSIZE_ERROR;
}
aggsb_numpages = lengthPXD(&sb.s_logpxd) * logsup.bsize / LOGPSIZE;
if (logsup.size != aggsb_numpages) {
fsck_send_msg(lrdo_LOGSUPBADLOGSZ);
return JFS_LOGSIZE_ERROR;
}
}
/*
*set lbperpage in vopen.
*/
vopen[vol].lbperpage = PSIZE >> vopen[vol].l2bsize;
/*
* was it cleanly umounted ?
*/
if (sb.s_state == FM_CLEAN) {
vopen[vol].status = FM_CLEAN;
vopen[vol].state = VOPEN_CLOSED;
return (0);
}
/*
* get status of volume
*/
vopen[vol].status = sb.s_state;
vopen[vol].is_fsdirty = (sb.s_state & FM_DIRTY);
/*
*check log serial number
*/
if (sb.s_logserial != Log.serial) {
fsck_send_msg(lrdo_FSSUPERBADLOGSER);
vopen[vol].state = VOPEN_CLOSED;
fsError(SERIALNO, vol, SUPER1_B);
return (LOGSUPER_BADSERIAL);
}
/* initialize the disk and inode maps
*/
if ((rc = initMaps(vol)) != 0) {
fsck_send_msg(lrdo_INITMAPSFAIL);
fsError(MAPERR, vol, 0);
return (rc);
}
vopen[vol].state = VOPEN_OPEN;
return 0;
}
/*
* NAME: updateSuper(vol)
*
* FUNCTION: updates primary aggregate/lv's superblock status and
* writes it out.
*/
int updateSuper(int vol)
{ /* device minor number of aggregate/lv */
int rc, status;
struct superblock sb;
/* read in superblock of the volume */
if ((rc = rdwrSuper(vopen[vol].fp, &sb, PB_READ)) != 0) {
fsck_send_msg(lrdo_READFSSUPERFAIL);
return (FSSUPER_READERROR2);
}
/* mark superblock state. write it out */
status = vopen[vol].status;
if (status & (FM_DIRTY | FM_LOGREDO))
sb.s_state = status & ~FM_EXTENDFS;
else
sb.s_state = FM_CLEAN;
if ((rc = rdwrSuper(vopen[vol].fp, &sb, PB_UPDATE)) != 0) {
fsck_send_msg(lrdo_WRITEFSSUPERFAIL);
}
return (rc);
}
/*
* NAME: rdwrSuper(fp, sb, rwflag)
*
* FUNCTION: read or write the superblock for the file system described
* by the file descriptor of the opened aggregate/lv.
* for read, if a read of primary superblock is failed,
* try to read the secondary superblock. report error only
* when both reads failed.
* for write, any write failure should be reported.
*/
int rdwrSuper(FILE *fp, struct superblock * sb, int32_t rwflag)
{
int rc;
uint64_t super_offset;
union {
struct superblock super;
char block[PSIZE];
} super;
if (use_2ndary_agg_superblock) {
super_offset = SUPER2_OFF;
} else {
super_offset = SUPER1_OFF;
}
/*
* seek to the postion of the primary superblock.
* since at this time we don't know the aggregate/lv
* logical block size yet, we have to use the fixed
* byte offset address super_offset to seek for.
*/
/*
* read super block
*/
if (rwflag == PB_READ) {
rc = ujfs_rw_diskblocks(fp, super_offset,
(unsigned) SIZE_OF_SUPER, super.block, GET);
if (rc != 0) {
if (!use_2ndary_agg_superblock) {
fsck_send_msg(lrdo_READFSPRIMSBFAIL);
return (CANTREAD_PRIMFSSUPER);
} else {
fsck_send_msg(lrdo_READFS2NDSBFAIL);
return (CANTREAD_2NDFSSUPER);
}
}
*sb = super.super;
ujfs_swap_superblock(sb);
/*
* write superblock
*/
} else { /* PB_UPDATE */
/* ? memset(super.block, 0, SIZE_OF_SUPER); */
super.super = *sb;
ujfs_swap_superblock(&super.super);
/*
* write whichever superblock we're working with.
* chkdsk will take care of replicating it.
*/
rc = ujfs_rw_diskblocks(fp, super_offset,
(unsigned) SIZE_OF_SUPER, super.block, PUT);
if (rc != 0) {
if (!use_2ndary_agg_superblock) {
fsck_send_msg(lrdo_WRITEFSPRIMSBFAIL);
return (CANTWRITE_PRIMFSSUPER);
} else {
fsck_send_msg(lrdo_WRITEFS2NDSBFAIL);
return (CANTWRITE_2NDFSSUPER);
}
}
}
return (0);
}
/*
* NAME: bflush()
*
* FUNCTION: write out appropriate portion of buffer page if its modified.
* Note that a dtree page may not be 4k, depending on the length
* field specified in pxd. Write out only length that is needed.
*/
int bflush(int32_t k, /* The index in bufhdr that describes buf */
struct bufpool *buf)
{ /* pointer to buffer pool page */
FILE *fp = NULL;
int rc;
int32_t vol;
int32_t nbytes;
int64_t blkno;
/* nothing to do ? */
if (bufhdr[k].modify == 0)
return (0);
/* write it out */
vol = bufhdr[k].vol;
fp = vopen[vol].fp;
blkno = addressPXD(&bufhdr[k].pxd);
nbytes = lengthPXD(&bufhdr[k].pxd) << vopen[vol].l2bsize;
rc = ujfs_rw_diskblocks(fp,
(uint64_t) (blkno << vopen[vol].l2bsize),
(unsigned) nbytes, (char *) buf, PUT);
if (rc != 0) {
fsck_send_msg(lrdo_BUFFLUSHFAIL);
return (BFLUSH_WRITEERROR);
}
bufhdr[k].modify = 0;
return (0);
}
/*
* NAME: findLog()
*
* FUNCTION: open the device to see if it's a valid filesystem
* or journal. If it is a filesystem, determine whether
* the log is inline or external. If external, find
* the log device.
*
*/
int findLog(FILE *fp, int *in_use)
{
struct logsuper logsup;
struct superblock sb;
*in_use = 0;
/*
* try the LV as file system with in-line log
*/
if (rdwrSuper(fp, &sb, PB_READ)) {
fsck_send_msg(lrdo_NOTAFSDEV);
return NOT_FSDEV_ERROR;
}
/*
* is the LV a file system ?
*/
if (memcmp(sb.s_magic, JFS_MAGIC, sizeof (sb.s_magic)) == 0) {
/*
* does file system contains its in-line log ?
*/
if ((sb.s_flag & JFS_INLINELOG) == JFS_INLINELOG) {
Log.location = INLINELOG;
Log.fp = fp;
//Log.status = sb.s_state;
Log.l2bsize = sb.s_l2bsize;
Log.xaddr = addressPXD(&sb.s_logpxd) << sb.s_l2bsize;
/* vopen[0] represents fs if inline log */
vopen[0].status = sb.s_state;
vopen[0].fp = fp;
return 0;
}
/* Save fp and uuid */
primary_vol.fp = fp;
uuid_copy(primary_vol.uuid, sb.s_uuid);
/*
* External log
*
* First check device specified on
* command line
*/
Log.xaddr = 0;
if (log_device[0]) {
Log.fp = NULL;
if (LogOpenMode != O_RDONLY) {
Log.fp = fopen_excl(log_device, "r+");
if (Log.fp == NULL)
*in_use = 1;
}
if (Log.fp == NULL) {
Log.fp = fopen(log_device, "r");
if (Log.fp == NULL) {
printf("Invalid journal specified (%s)\n",
log_device);
goto by_uuid;
}
}
ujfs_rw_diskblocks(Log.fp, LOGPNTOB(LOGSUPER_B),
sizeof (struct logsuper), &logsup, GET);
ujfs_swap_logsuper(&logsup);
if ((logsup.magic != LOGMAGIC) || (uuid_compare(logsup.uuid, sb.s_loguuid))) {
fclose(Log.fp);
*in_use = 0;
goto by_uuid;
}
Log.location = OUTLINELOG;
return 0;
}
by_uuid:
Log.fp = open_by_label(sb.s_loguuid, 0, 1, NULL, in_use);
if (Log.fp != NULL) {
Log.location |= OUTLINELOG;
return 0;
}
return NOT_INLINELOG_ERROR;
}
/*
* is this an external log?
*/
ujfs_rw_diskblocks(fp, LOGPNTOB(LOGSUPER_B), sizeof (struct logsuper), &logsup, GET);
ujfs_swap_logsuper(&logsup);
if (logsup.magic != LOGMAGIC) {
fsck_send_msg(lrdo_NOTAFSDEV);
return NOT_FSDEV_ERROR;
}
Log.fp = fp;
Log.location = OUTLINELOG;
return 0;
}
extern void exit(int);
/*
* NAME: fsError(type,vol,bn)
*
* FUNCTION: error handling code for the specified
* aggregate/lv (filesystem).
*/
int fsError(int type, /* error types */
int vol, /* the minor number of the aggregate/lv */
int64_t bn)
{ /* aggregate block No. */
fsck_send_msg(lrdo_ERRORONVOL, vol);
retcode = -1;
vopen[vol].status = FM_LOGREDO;
switch (type) {
case OPENERR:
fsck_send_msg(lrdo_OPENFAILED);
break;
case MAPERR:
fsck_send_msg(lrdo_CANTINITMAPS);
break;
case DBTYPE:
fsck_send_msg(lrdo_BADDISKBLKNUM, (long long) bn);
break;
case INOTYPE:
fsck_send_msg(lrdo_BADINODENUM, (long long) bn);
break;
case READERR:
fsck_send_msg(lrdo_CANTREADBLK, (long long) bn);
break;
case SERIALNO:
fsck_send_msg(lrdo_BADLOGSER);
break;
case IOERROR:
fsck_send_msg(lrdo_IOERRREADINGBLK, (long long) bn);
break;
case LOGRCERR:
fsck_send_msg(lrdo_BADUPDMAPREC, (long long) bn);
break;
}
return (0);
}
/*
* logError(type)
*
* error handling for log read errors.
*/
int logError(int type, int logaddr)
{
int k;
retcode = -1;
logsup.state = LOGREADERR;
switch (type) {
case LOGEND:
fsck_send_msg(lrdo_FINDLOGENDFAIL);
break;
case READERR:
fsck_send_msg(lrdo_LOGREADFAIL, logaddr);
break;
case UNKNOWNR:
fsck_send_msg(lrdo_UNRECOGTYPE, logaddr);
break;
case IOERROR:
fsck_send_msg(lrdo_IOERRONLOG, logaddr);
break;
case LOGWRAP:
fsck_send_msg(lrdo_LOGWRAP);
}
/* mark all open volumes in error
*/
for (k = 0; k < MAX_ACTIVE; k++) {
if ((vopen[k].state == VOPEN_OPEN) && vopen[k].status != FM_CLEAN)
vopen[k].status = FM_LOGREDO;
}
return (0);
}
/*
* recoverExtendFS()
*
* function: recover crash while in extendfs() for inline log;
*
* note: fs superblock fields remains pre-extendfs state,
* while that bmap file, fsck and inline log area may be in
* unknown state;
*
* at entry, only log type/lv has been validated;
* for inline log: vopen[0], fs fp = log fp;
*/
static int recoverExtendFS(FILE *fp)
{
struct superblock *sbp;
struct dinode *dip1, *dip2;
struct dbmap *bgcp;
xtpage_t *p;
int64_t lmchild = 0, xaddr, xoff, barrier, t64, agsize;
uint8_t lmxflag;
int32_t i;
char *dip, *bp;
pxd_t temp_pxd;
/*
* read bmap global control page
*/
/* read superblock yet again */
sbp = (struct superblock *) &buffer[0];
if (rdwrSuper(fp, sbp, PB_READ))
goto errout;
/* read primary block allocation map inode */
dip = (char *) &buffer[1];
if (ujfs_rw_diskblocks(fp, AITBL_OFF, PSIZE, dip, GET)) {
fsck_send_msg(lrdo_EXTFSREADFSSUPERFAIL);
goto errout;
}
/* locate the inode in the buffer page */
dip1 = (struct dinode *) dip;
dip1 += BMAP_I;
bp = (char *) &buffer[2]; /* utility buffer */
/* start from root in dinode */
p = (xtpage_t *) & dip1->di_btroot;
/* is this page leaf ? */
if (p->header.flag & BT_LEAF)
goto rdbgcp;
/* traverse down leftmost child node to leftmost leaf of xtree */
do {
/* read in the leftmost child page */
t64 = addressXAD(&p->xad[XTENTRYSTART]) << sbp->s_l2bsize;
if (ujfs_rw_diskblocks(fp, t64, PSIZE, bp, GET)) {
fsck_send_msg(lrdo_EXTFSREADBLKMAPINOFAIL);
goto errout;
}
p = (xtpage_t *) bp;
/* is this page leaf ? */
if (p->header.flag & BT_LEAF)
break;
} while (1);
rdbgcp:
t64 = addressXAD(&p->xad[XTENTRYSTART]) << sbp->s_l2bsize;
if (ujfs_rw_diskblocks(fp, t64, PSIZE, bp, GET)) {
fsck_send_msg(lrdo_EXTFSREADBLKFAIL1, (long long) t64);
goto errout;
}
bgcp = (struct dbmap *) bp;
/*
* recover to pre- or post-extendfs state ?:
*/
if (__le64_to_cpu(bgcp->dn_mapsize) > (sbp->s_size >> sbp->s_l2bfactor)) {
agsize = __le64_to_cpu(bgcp->dn_agsize);
goto postx;
}
/*
* recover pre-extendfs state
*/
/*
* reset block allocation map inode (xtree root)
*/
/* read 2ndary block allocation map inode */
t64 = addressPXD(&sbp->s_ait2) << sbp->s_l2bsize;
if (ujfs_rw_diskblocks(fp, t64, PSIZE, bp, GET)) {
fsck_send_msg(lrdo_EXTFSREADBLKFAIL2, (long long) t64);
goto errout;
}
dip2 = (struct dinode *) bp;
dip2 += BMAP_I;
/*
* Reset primary bam inode with 2ndary bam inode
*
* Not forgetting to reset di_ixpxd since they are in different
* inode extents.
*/
memcpy((void *) &temp_pxd, (void *) &(dip1->di_ixpxd), sizeof (pxd_t));
memcpy(dip1, dip2, DISIZE);
memcpy((void *) &(dip1->di_ixpxd), (void *) &temp_pxd, sizeof (pxd_t));
if (ujfs_rw_diskblocks(fp, AITBL_OFF, PSIZE, dip, PUT)) {
fsck_send_msg(lrdo_EXTFSWRITEBLKFAIL1, AITBL_OFF);
goto errout;
}
/*
* backout bmap file to fs size:
*
* trim xtree to range specified by i_size:
* xtree has been grown in append mode and
* written from right to left, bottom-up;
*/
barrier = __le64_to_cpu(dip1->di_size) >> sbp->s_l2bsize;
/* start with root */
xaddr = 0;
p = (xtpage_t *) & dip1->di_btroot;
lmxflag = p->header.flag;
p->header.next = 0;
if (lmxflag & BT_INTERNAL) {
/* save leftmost child xtpage xaddr */
lmchild = addressXAD(&p->xad[XTENTRYSTART]);
}
/*
* scan each level of xtree via leftmost descend
*/
while (1) {
/*
* scan each xtpage of current level of xtree
*/
while (1) {
/*
* scan each xad in current xtpage
*/
for (i = XTENTRYSTART; i < p->header.nextindex; i++) {
/* test if extent is of interest */
xoff = offsetXAD(&p->xad[i]);
if (xoff < barrier)
continue;
/*
* barrier met in current page
*/
assert(i > XTENTRYSTART);
/* update current page */
p->header.nextindex = i;
if (xaddr) {
/* discard further right sibling
* pages
*/
p->header.next = 0;
if (ujfs_rw_diskblocks(fp, t64, PSIZE, p, PUT)) {
fsck_send_msg(lrdo_EXTFSWRITEBLKFAIL2, (long long) t64);
goto errout;
}
}
goto nextLevel;
} /* end for current xtpage scan */
/* barrier was not met in current page */
/* read in next/right sibling xtpage */
xaddr = p->header.next;
if (xaddr) {
if (xaddr >= barrier) {
p->header.next = 0;
if (ujfs_rw_diskblocks(fp, t64, PSIZE, p, PUT)) {
fsck_send_msg(lrdo_EXTFSWRITEBLKFAIL3, (long long) t64);
break;
}
}
t64 = xaddr << sbp->s_l2bsize;
if (ujfs_rw_diskblocks(fp, t64, PSIZE, bp, GET)) {
fsck_send_msg(lrdo_EXTFSREADBLKFAIL3, (long long) t64);
goto errout;
}
p = (xtpage_t *) bp;
} else
break;
} /* end while current level scan */
/*
* descend: read leftmost xtpage of next lower level of xtree
*/
nextLevel:
if (lmxflag & BT_INTERNAL) {
/* get the leftmost child page */
xaddr = lmchild;
t64 = xaddr << sbp->s_l2bsize;
if (ujfs_rw_diskblocks(fp, t64, PSIZE, bp, GET)) {
fsck_send_msg(lrdo_EXTFSREADBLKFAIL4, (long long) t64);
goto errout;
}
p = (xtpage_t *) bp;
lmxflag = p->header.flag;
if (lmxflag & BT_INTERNAL) {
/* save leftmost child xtpage xaddr */
lmchild = addressXAD(&p->xad[XTENTRYSTART]);
}
} else
break;
} /* end while level scan */
/*
* reconstruct map;
*
* readBmap() init blocks beyond fs size in the last
* partial dmap page as allocated which might have been
* marked as free by extendfs();
*/
/* fake log opend/validated */
Log.serial = sbp->s_logserial;
/*
* reconstruct maps
*/
/* open LV and initialize maps */
if (logredoInit()) {
fsck_send_msg(lrdo_EXTFSINITLOGREDOFAIL);
goto errout;
}
/* bypass log replay */
/* update/write maps */
updateMaps(0);
/*
* reformat log
*
* request reformat original log (which might have been
* overwritten by extendfs() and set superblock clean
*/
jfs_logform(fp, sbp->s_bsize, sbp->s_l2bsize, sbp->s_flag,
addressPXD(&sbp->s_logpxd), lengthPXD(&sbp->s_logpxd), NULL, NULL);
/* update superblock */
updateSuper(0);
fsck_send_msg(lrdo_REXTNDTOPRE);
return 0;
/*
* recover post-extendfs state
*/
postx:
/*
* update 2ndary bam inode
*/
/* read 2ndary block allocation map inode */
t64 = addressPXD(&sbp->s_ait2) << sbp->s_l2bsize;
if (ujfs_rw_diskblocks(fp, t64, PSIZE, bp, GET)) {
fsck_send_msg(lrdo_EXTFSREADBLKFAIL5, (long long) t64);
goto errout;
}
dip2 = (struct dinode *) bp;
dip2 += BMAP_I;
/*
* Reset 2ndary bam inode with primary bam inode
* Not forgetting to reset di_ixpxd since they are in different
* inode extents.
*/
memcpy((void *) &temp_pxd, (void *) &(dip2->di_ixpxd), sizeof (pxd_t));
memcpy(dip2, dip1, DISIZE);
memcpy((void *) &(dip2->di_ixpxd), (void *) &temp_pxd, sizeof (pxd_t));
if (ujfs_rw_diskblocks(fp, t64, PSIZE, bp, PUT)) {
fsck_send_msg(lrdo_EXTFSWRITEBLKFAIL4, (long long) t64);
goto errout;
}
/*
* update superblock
*/
if (!(sbp->s_state & (FM_DIRTY | FM_LOGREDO)))
sbp->s_state = FM_CLEAN;
else
sbp->s_state &= ~FM_EXTENDFS;
sbp->s_size = sbp->s_xsize;
sbp->s_agsize = agsize;
sbp->s_fsckpxd = sbp->s_xfsckpxd;
sbp->s_fscklog = 0;
sbp->s_logpxd = sbp->s_xlogpxd;
sbp->s_logserial = 1;
if (rdwrSuper(fp, sbp, PB_UPDATE)) {
fsck_send_msg(lrdo_EXTFSWRITEFSSUPERFAIL);
goto errout;
}
/*
* finalize log
*
* note: new log is valid;
*/
/* read log superblock */
t64 = (addressPXD(&sbp->s_logpxd) << sbp->s_l2bsize) + LOGPSIZE;
if (ujfs_rw_diskblocks(fp, t64, LOGPSIZE, &logsup, GET)) {
fsck_send_msg(lrdo_EXTFSREADLOGSUPFAIL);
goto errout;
}
logsup.end = findEndOfLog();
logsup.state = LOGREDONE;
if (ujfs_rw_diskblocks(fp, t64, LOGPSIZE, &logsup, PUT)) {
fsck_send_msg(lrdo_EXTFSWRITELOGSUPFAIL);
goto errout;
}
fsck_send_msg(lrdo_REXTNDTOPOST);
return 0;
errout:
fsck_send_msg(lrdo_REXTNDFAIL, errno);
return (EXTENDFS_FAILRECOV);
}
/*
*
* NAME: alloc_dmap_bitrec
*
* FUNCTION: This routine allocates memory by calling the chkdsk
* alloc_wrksp() routine (because that will allocate high
* memory during autocheck). If that fails then logredo
* cannot continue bmap processing, so it will set a flag
* and make the storage aleady allocated to the bmap
* available for other uses.
* was successfully allocated and there's enough of it left,
* this routine will return a piece of it.
*/
int alloc_dmap_bitrec(struct dmap_bitmaps ** dmap_bitrec)
{
int adb_rc = 0;
int intermed_rc = 0;
*dmap_bitrec = NULL;
intermed_rc = alloc_wrksp((uint32_t) (sizeof (struct dmap_bitmaps)), 0, /* not meaningful from logredo */
-1, /* I am logredo */
(void **) dmap_bitrec);
if ((intermed_rc != 0) || ((*dmap_bitrec) == NULL)) {
Insuff_memory_for_maps = -1;
available_stg_addr = bmap_stg_addr;
available_stg_bytes = bmap_stg_bytes;
/*
* initialize the storage for its new use
*/
memset((void *) available_stg_addr, 0, available_stg_bytes);
}
return (adb_rc);
} /* end alloc_dmap_bitrec() */
/*
*
* NAME: alloc_storage
*
* FUNCTION: This routine allocates memory by calling the chkdsk
* alloc_wrksp() routine (because that will allocate high
* memory during autocheck). If that fails and the bmap
* was successfully allocated and there's enough of it left,
* this routine will return a piece of it.
*/
int alloc_storage(int32_t size_in_bytes, void **addr_stg_ptr, int32_t * bmap_stg_returned)
{
int as_rc = 0;
int intermed_rc = 0;
*bmap_stg_returned = 0; /* assume we'll get it the usual way */
*addr_stg_ptr = NULL;
intermed_rc = alloc_wrksp((uint32_t) size_in_bytes, 0, -1, addr_stg_ptr);
if ((intermed_rc != 0) || ((*addr_stg_ptr) == NULL)) {
if ((!Insuff_memory_for_maps) && (bmap_stg_addr != NULL)) {
/*
* we did allocate storage for the bmap
* and haven't started cannibalizing it yet
*/
Insuff_memory_for_maps = -1;
available_stg_addr = bmap_stg_addr;
available_stg_bytes = bmap_stg_bytes;
/*
* initialize the storage for its new use
*/
memset((void *) available_stg_addr, 0, available_stg_bytes);
}
/* end we did allocate storage for the bmap... */
if (Insuff_memory_for_maps & (available_stg_bytes != 0)) {
/*
* we may be able to go on anyway
*/
if (available_stg_bytes < size_in_bytes) {
/*
* not enough here
*/
return (ENOMEM0);
} else {
/* we can scavenge the memory we need */
*addr_stg_ptr = available_stg_addr;
available_stg_bytes -= size_in_bytes;
available_stg_addr = (char *) (available_stg_addr + size_in_bytes);
*bmap_stg_returned = -1;
}
} else {
return (ENOMEM1);
}
}
return (as_rc);
}
#ifdef _JFS_WIP
/*
* nfsisloaded()
*
* check whether nfs is loaded
*/
static int nfsisloaded()
{
int sav_errno;
int (*entry) ();
if (entry = load("/usr/sbin/probe", 0, 0))
return (1);
if (errno == ENOEXEC) {
DBG_TRACE(("%s: nfs is not loaded\n", prog))
return (0);
}
sav_errno = errno;
DBG_TRACE(("%s: ", prog))
errno = sav_errno;
perror("load");
return (0);
}
#endif /* _JFS_WIP */
#ifdef _JFS_DEBUG
/*
* xdump()
*
* hex dump
*/
xdump(char *saddr, int count)
{
#define LINESZ 60
#define ASCIISTRT 40
#define HEXEND 36
int i, j, k, hexdigit;
int c;
char *hexchar;
char linebuf[LINESZ + 1];
char prevbuf[LINESZ + 1];
char *linestart;
int asciistart;
char asterisk = ' ';
void x_scpy();
int x_scmp();
hexchar = "0123456789ABCDEF";
prevbuf[0] = '\0';
i = (int) saddr % 4;
if (i != 0)
saddr = saddr - i;
for (i = 0; i < count;) {
for (j = 0; j < LINESZ; j++)
linebuf[j] = ' ';
linestart = saddr;
asciistart = ASCIISTRT;
for (j = 0; j < HEXEND;) {
for (k = 0; k < 4; k++) {
c = *(saddr++) & 0xFF;
if ((c >= 0x20) && (c <= 0x7e))
linebuf[asciistart++] = (char) c;
else
linebuf[asciistart++] = '.';
hexdigit = c >> 4;
linebuf[j++] = hexchar[hexdigit];
hexdigit = c & 0x0f;
linebuf[j++] = hexchar[hexdigit];
i++;
}
if (i >= count)
break;
linebuf[j++] = ' ';
}
linebuf[LINESZ] = '\0';
if (((j = x_scmp(linebuf, prevbuf)) == 0) && (i < count)) {
if (asterisk == ' ') {
asterisk = '*';
DBG_TRACE((" *\n"))
}
} else {
DBG_TRACE((" %x %s\n", linestart, linebuf))
asterisk = ' ';
x_scpy(prevbuf, linebuf);
}
}
return (0);
}
int x_scmp(char *s1, char *s2)
{
while ((*s1) && (*s1 == *s2)) {
s1++;
s2++;
}
if (*s1 || *s2)
return (-1);
else
return (0);
}
void x_scpy(char *s1, char *s2)
{
while ((*s1 = *s2) != '\0') {
s1++;
s2++;
}
}
prtdesc(struct lrd *ld)
{
switch (ld->log.redopage.type) {
case LOG_XTREE:
DBG_TRACE((" REDOPAGE:XTREE\n "))
break;
case (LOG_XTREE | LOG_NEW):
DBG_TRACE((" REDOPAGE:XTREE_NEW\n "))
break;
case (LOG_BTROOT | LOG_XTREE):
DBG_TRACE((" REDOPAGE:BTROOT_XTREE\n "))
break;
case LOG_DTREE:
DBG_TRACE((" REDOPAGE:DTREE\n "))
break;
case (LOG_DTREE | LOG_NEW):
DBG_TRACE((" REDOPAGE:DTREE_NEW \n "))
break;
case (LOG_DTREE | LOG_EXTEND):
DBG_TRACE((" REDOPAGE:DTREE_EXTEND\n "))
break;
case (LOG_BTROOT | LOG_DTREE):
DBG_TRACE((" REDOPAGE:BTROOT_DTREE\n "))
break;
case (LOG_BTROOT | LOG_DTREE | LOG_NEW):
DBG_TRACE((" REDOPAGE:BTROOT_DTREE.NEW\n "))
break;
case LOG_INODE:
/*
* logredo() updates imap for alloc of inode.
*/
DBG_TRACE((" REDOPAGE:INODE\n "))
break;
case LOG_EA:
DBG_TRACE((" REDOPAGE:EA\n "))
break;
case LOG_DATA:
DBG_TRACE((" REDOPAGE:DATA\n "))
break;
}
return (0);
}
#endif /* _JFS_DEBUG */