# # WARNING . WARNING . WARNING . WARNING # # EXPERIMENTAL, FILESYSTEM FRYING, INCOMPLETE, NAIVE, WRONG implementation. # #-------------------------------------------------------------------------------------------------- # # No SPLIT/MERGE of btree <=> No node alloc/dealloc # # No EXTENTS OVERFLOW <=> No extents btree # # Don't care about attribute or startup files. # # Basic allocation algo => First found, 1 chunk only, byte level in alloc bitmap. # and we don't use/update nextAllocation hints. # # block_size == nodeSize or DIE. # # HFS+ wrapped in HFS ONLY # # Crude Unicode approximation (utf16_be) # Don't care about encodings (so what ?) # Don't decompose (DANGEROUS, NO CHECK, NO WARNINGS) # Use python's lowercase conversion and compare (DANGEROUS, NO CHECK, NO WARNINGS) # BETTER USE 'pure 7 bit ASCII' IN FILE NAMES. # # Fear : (thread_file_node.idx == file_node.idx) => PANIC. Idem for folder_node. # # # dev_path = '/dev/discs/disc1/part3' import struct, time, codecs, sys, os, stat from string import printable, join from hfs_volumes import * # FIXME : not really what we need : we should use 'fully decomposed form'. That's not what utf-16-be gives us. # we don't care for read and compare. # It'll be VERY BAD when writing file/folder names. # Using python string compare is VERY BAD (may not be the same as Apple's fast compare => different key order !!!) # (utf_encode, utf_decode, sr, sw) = codecs.lookup('utf-16-be') def hexdump(str): count = 0 gra = '' for c in str: print "%02X" % (ord(c)), if ord(c)>=32: gra = gra + c else: gra = gra + '.' count += 1 if (count == 16): print '\t', gra gra = '' count = 0 print ' ' * (16-count), '\t', gra class BTNodeDescriptor: def __init__(self): self.format = '>LLbBH' self.size = struct.calcsize(self.format) def from_string(self, str): (self.fLink, self.bLink, self.kind, self.height, self.numRecords) = struct.unpack(self.format, str[:self.size]) class BTHeaderRec: def __init__(self): self.format = '>HIIIIHHII2xIBxI' self.size = struct.calcsize(self.format) def from_string(self, str): (self.treeDepth, self.rootNode, self.leafRecords, self.firstLeafNode, self.lastLeafNode, self.nodeSize, self.maxKeyLength, self.totalNodes, self.freeNodes, self.clumpSize, self.btreeType, self.attributes) = struct.unpack(self.format, str[:self.size]) def to_string(self): return struct.pack(self.format, self.treeDepth, self.rootNode, self.leafRecords, self.firstLeafNode, self.lastLeafNode, self.nodeSize, self.maxKeyLength, self.totalNodes, self.freeNodes, self.clumpSize, self.btreeType, self.attributes) # # A wonderfull Apple BTree Thinguie. # class Tree: def __init__(self, file): self.file = file self.block0 = self.file.read_block(0, 1) self.nodeDesc = BTNodeDescriptor() self.nodeDesc.from_string(self.block0) self.header = BTHeaderRec() self.header.from_string(self.block0[14:14+self.header.size]) self.nodeSize = self.header.nodeSize if (self.nodeDesc.kind != 1) or (self.nodeDesc.numRecords != 3): raise 'Header node stinks' if self.nodeSize != self.file.volume.blockSize: raise 'FIXME: self.nodeSize != self.file.volume.blockSize' self.root_node = TreeNode(self, self.header.rootNode) def update(self): # For now, only header may have changed self.block0[:14] + self.header.to_string() + self.block0[14+self.header.size:] self.file.write_block(0, self.block0) def read_node(self, idx): # FIXME : assume nodeSize == blockSize return self.file.read_block(idx, 1) def write_node(self, idx, data): # FIXME : assume nodeSize == blockSize return self.file.write_block(idx, data) def find(self, path): folderID = 2 # root folder path = path.split('/') for elem in path: (node, rec) = self.root_node.find(folderID, elem) if rec==None: break rec.decode() if rec.is_folder(): folderID=rec.folderID elif rec.is_file_thread() or rec.is_folder_thread(): raise 'folder or file thread found while searching in a path ?' else: # it's a file. Or an alien artifact. if elem != path[-1]: raise '%s is a file, just in the middle of the path.' % (elem) return (node,rec) class TreeNode: def __init__(self, tree, idx, data=None): self.tree = tree self.idx = idx self.size = self.tree.nodeSize if not data: self.data = self.tree.read_node(idx) else: self.data = data self.node_desc = BTNodeDescriptor() self.node_desc.from_string(self.data) def update(self): self.tree.write_node(self.idx, self.data) def dump(self): print "Node #%d dump----------------" % (self.idx) print "\tSize: %d\tKind %d\tHeight %d\tNbRec %d" % (self.tree.nodeSize, self.node_desc.kind, self.node_desc.height, self.node_desc.numRecords) print "\tf link: %d\tb link: %d" % (self.node_desc.fLink, self.node_desc.bLink) for i in range(self.node_desc.numRecords): tmp = self.get_record(i) (addr, of7) = self.get_record_addr_and_offset(i) print "\tRecord %02d\t addr %04d of7 %04d\t%02d:%-40s" % (i, addr, of7, tmp.keyID, tmp.nodeName.encode('latin-1', 'replace')), if tmp.node_index: print '\t index to %d' % (tmp.node_index) else: print '\t is leaf.' def get_record_addr(self, idx): if idx>self.node_desc.numRecords: raise 'Trying to get addr of record %d on a node with %d records.' % (idx, self.node_desc.numRecords) return self.size-(2*idx+2) def get_record_addr_and_offset(self, idx): addr = self.get_record_addr(idx) (of7, ) = struct.unpack( '>H', self.data[addr:addr+2]) return (addr,of7) def get_record(self, idx): if idx>=self.node_desc.numRecords: raise 'Trying to access record %d on a node with %d records.' % (idx, self.node_desc.numRecords) (ofn, of7) = struct.unpack( '>HH', self.data[self.size-(2*idx+4):self.size-(2*idx)]) # print "get_record: idx %d found at of7 %d len %d" % (idx, of7, ofn-of7) if self.node_desc.kind == -1: # leaf node return LeafRecord(idx, self.data[of7:ofn]) if self.node_desc.kind == 0: # index node return IndexRecord(idx, self.data[of7:ofn]) raise 'Oups. Bad record kind (%d)' % (self.node_desc.kind) def put_record(self, idx, data): if idx>=self.node_desc.numRecords: raise 'Trying to access record %d on a node with %d records.' % (idx, self.node_desc.numRecords) (ofn, of7) = struct.unpack( '>HH', self.data[self.size-(2*idx+4):self.size-(2*idx)]) if (ofn-of7) != len(data): raise 'size of record changed! Unable to put it back.' self.data = self.data[:of7] + data + self.data[ofn:] def find_prev_record(self, id, node_name): prev = None cur_idx = 0 cur = self.get_record(cur_idx) print '\nLooking for %02d:%s' % (id, node_name) while key_comp(id, node_name, cur) != -1: prev = cur cur_idx += 1 if cur_idx == self.node_desc.numRecords: break cur = self.get_record(cur_idx) return(prev) def find(self, id, node_name): prev = self.find_prev_record(id, node_name) if (prev==None): raise 'Boh ?' if self.node_desc.kind == -1: if key_comp(id, node_name, prev) == 0: return (self, prev) else: return (self, None) else: new_node = prev.get_node(self.tree) return new_node.find(id, node_name) def insert(self, record): (last_rec_addr, last_rec_offset) = self.get_record_addr_and_offset(self.node_desc.numRecords) print "Last addr %d, of7 %d" % (last_rec_addr, last_rec_offset) space_free = last_rec_addr-last_rec_offset record_len = len(record.data) print "Insert %d, %d free " % (record_len, space_free) if space_free < record_len: raise "No more space in node. Have to split. Don't want to. Sorry" prev = self.find_prev_record(record.keyID, record.nodeName) (prev_rec_addr, prev_rec_offset) = self.get_record_addr_and_offset(prev.idx) (cur_rec_addr, cur_rec_offset) = self.get_record_addr_and_offset(prev.idx+1) # data from start to cur_rev_offset is before us hdr = self.data[0:10] + struct.pack( '>H', self.node_desc.numRecords+1) + self.data[12:cur_rec_offset] # add our record hdr += record.data # add all that was between the record we replace and the end print "Moving old data from %d-%d to %d" % (cur_rec_offset, last_rec_offset, len(hdr)) hdr += self.data[cur_rec_offset:last_rec_offset] # pad for free space (add 2 byte for new record info) hdr += '\x00' * (space_free-(record_len+2)) # move each record from last to cur down for i in range(prev.idx,self.node_desc.numRecords): idx = (self.node_desc.numRecords+prev.idx)-i (tmp_addr, tmp_of7) = self.get_record_addr_and_offset(idx) print "idx %d was addr %d of7 %d" % (idx, tmp_addr, tmp_of7) tmp_of7+=record_len print " is now of7 %d" % (tmp_of7) hdr += struct.pack( '>H', tmp_of7) # remaining records (with us) have not changed hdr += self.data[cur_rec_addr:self.size] if len(hdr) != self.size: raise "There's something wicked in this new record !!!!" # Here, we need to update the number of records if self.node_desc.kind == -1: # It's a leaf node, update the number of leaf records in the tree header. self.tree.header.leafRecords += 1 new_node = TreeNode(self.tree, self.idx, hdr) return new_node def key_comp(id, name, rec): if id < rec.keyID: return -1 if id > rec.keyID: return 1 name = name.lower() rec_name = rec.nodeName.lower() if name < rec_name: return -1 if name > rec_name: return 1 return 0 class IndexRecord: def __init__(self, idx, data): self.idx = idx self.data = data (self.keyLength, self.keyID, self.nodeNameLen) = struct.unpack( '>HIH', self.data[0:8]) raw_name = self.data[8:8+2*self.nodeNameLen] (self.nodeName, dummy) = utf_decode(raw_name) (self.node_index, ) = struct.unpack( '>L', self.data[8+2*self.nodeNameLen:8+2*self.nodeNameLen+4]) # return node pointed-by def get_node(self, tree): return TreeNode (tree, self.node_index) class LeafRecord: def __init__(self, idx=-1, data=None): self.idx = idx self.data = data self.node_index = -1 self.nodeName='' self.nodeNameLen=0 self.recordType = 0 self.parentID = 0 self.fileID = 0 self.keyID = 0 self.base_decode() def base_decode(self): if self.data: (self.keyLength, self.keyID, self.nodeNameLen) = struct.unpack( '>HIH', self.data[0:8]) (self.nodeName, dummy) = utf_decode(self.data[8:8+self.keyLength-6]) self.data_start = 8+self.keyLength-6 (self.recordType, ) = struct.unpack( '>H', self.data[self.data_start:self.data_start+2]) def build_file(self, file_ID, parent_ID, nodeName, dataFork, rsrcFork, date): (utf_name, utf_name_len) = utf_encode(nodeName) key_len = 6+len(utf_name) head = struct.pack( '>HLH', key_len, parent_ID, utf_name_len) + utf_name record = struct.pack( '>hH4xIIIIII16xII32x', 2, 2, file_ID, date, date , 0, 0, 0 , 0x4D504733, 0x686F6F6B) self.data = head + record + dataFork.to_string() + rsrcFork.to_string() self.base_decode() self.decode() def build_file_thread(self, file_ID, parent_ID, nodeName): (utf_name, utf_name_len) = utf_encode(nodeName) head = struct.pack( '>HLH', 6, file_ID, 0) record = struct.pack( '>hhLH', 4, 0, parent_ID, utf_name_len) + utf_name self.data = head + record self.base_decode() self.decode() def dump(self): name = ['Raw', 'Folder', 'File', 'FolderThread', 'FileThread'] print "Dumping LeafRecord. Type:%-16sDataStart %d" % (name[self.recordType], self.data_start) print "\tKEY: %02d:%s\tnodeIndex %02d" % (self.keyID, self.nodeName, self.node_index) if self.is_file(): print "DATA" tmp = HFSPlusForkData(None) tmp.from_string(self.get_data_fork()) tmp.dump() print "RSRC" tmp.from_string(self.get_rsrc_fork()) tmp.dump() if self.is_file_thread(): print "\tThread record for %s, fileID=%d, parentID=%d" % (self.elemName, self.fileID, self.parentID) def is_folder(self): return (self.recordType==0x0001) def is_file(self): return (self.recordType==0x0002) def is_folder_thread(self): return (self.recordType==0x0003) def is_file_thread(self): return (self.recordType==0x0004) def decode(self): if self.is_folder(): self.decodeFolder() elif self.is_file(): self.decodeFile() elif self.is_folder_thread(): self.decodeFolderThread() elif self.is_file_thread(): self.decodeFileThread() else: raise 'Unknown rec type %d' % (self.recordType) def decodeFolder(self): self.format = '>hHIIIIIII' self.size = struct.calcsize(self.format) self.parentID = self.keyID (self.recordType, self.flags, self.valence, self.folderID, self.createDate, self.contentModDate, self.attributeModDate, self.accessDate, self.backupDate) = struct.unpack(self.format, self.data[self.data_start:self.data_start+self.size]) def updateFolder(self, valence, contentModDate): self.valence = valence self.contentModDate = contentModDate self.data = self.data[:self.data_start] + struct.pack(self.format, self.recordType, self.flags, self.valence, self.folderID, self.createDate, self.contentModDate, self.attributeModDate, self.accessDate, self.backupDate) + self.data[self.data_start+self.size:] def decodeFile(self): self.format = '>hH4xIIIIII' self.size = struct.calcsize(self.format) self.parentID = self.keyID (self.recordType, self.flags, self.fileID, self.createDate, self.contentModDate, self.attributeModDate, self.accessDate, self.backupDate) = struct.unpack(self.format, self.data[self.data_start:self.data_start+self.size]) # offset calculation is strange # And what to put there, when writing ? self.fork_offset = 16 + 14 + 10 + 4 + 4 + 8 + self.data_start + self.size def decodeFileThread(self): self.format = '>h2xLH' self.size = struct.calcsize(self.format) self.fileID = self.keyID (self.recordType, self.parentID, elemName_len) = struct.unpack(self.format, self.data[self.data_start:self.data_start+self.size]) raw_name = self.data[self.data_start+self.size:self.data_start+self.size+elemName_len*2] (self.elemName, dummy) = utf_decode(raw_name) def get_data_fork(self): return self.data[self.fork_offset:self.fork_offset+80] def get_rsrc_fork(self): return self.data[self.fork_offset+80:self.fork_offset+160] device=open(dev_path, 'r+b') hfs_volume = HFSVolume() hfs_volume.read(device, 2*512) (hfsp_offset, hfsp_len) = hfs_volume.unwrap() print 'Found wrapped HFS+ at offset %08X, len %03.3f M' % (hfsp_offset, hfsp_len/(1024.0*1024.0)) hfsp_volume = HFSPlusVolume(hfsp_offset, hfsp_len) hfsp_volume.read(device) hfsp_volume.info() catalog_file_fork = HFSPlusForkData(hfsp_volume) catalog_file_fork.from_string(hfsp_volume.catalogFile) catalog_tree = Tree(catalog_file_fork) # # Well. That's good # Let's find folders/files in this mess, now. # # We start with a Parent_ID, and a name. # Search /find it. # It's a folder : we have a new parent id, to get down in the path. # It's a file : got it !!! # file_name = sys.argv[1] dir_path = sys.argv[2] file_size = os.stat(file_name)[stat.ST_SIZE] file_src = open(file_name, 'rb') print 'Copy %s to %s. File size is %d' % (file_name, dir_path, file_size) # find the folder (folder_node, folder_rec) = catalog_tree.find(dir_path) # find the file in folder (file_node, file_rec) = catalog_tree.root_node.find(folder_rec.folderID, file_name) if file_rec: raise "'%s' exists in '%s' Abort !" % (file_name, dir_path) print "Gonna insert %02d:%s in:" % (folder_rec.folderID, file_name) file_node.dump() #print "Next node is :" #next_node = TreeNode(catalog_tree, file_node.node_desc.fLink) #next_node.dump() # # Build a new file record payload # # That is : alloc some blocks. # Mark it. # Update alloc bitmap. # # Get a fileID. # Update volume header (new file id, dates, nbfiles, size, etc...). # (fileID, fileFork, mod_date) = hfsp_volume.alloc_file(file_size) # find the fileid in thread records. print 'Going to search for the thread id of new fileid=%d' % (fileID) (thread_file_node, thread_file_rec) = catalog_tree.root_node.find(fileID, '') if thread_file_rec: thread_file_rec.dump() raise 'Oups! There is already a fileid=%d in the thread records.... (it stinks)' % (fileID) print "Gonna insert %02d:%s in:" % (fileID, '') thread_file_node.dump() #FIXME : can this be ? How to do it, then ? if (thread_file_node.idx == file_node.idx) or (thread_file_node.idx == folder_node.idx) or (folder_node.idx == file_node.idx): raise "We're going to feel miserable : trying to update two separate copies of the same block: folder, file, thread = %d,%d,%d..." % (folder_node.idx, file_node.idx, thread_file_node) # empty rsrc fork rsrcFork = HFSPlusForkData(hfsp_volume) rsrcFork.from_string('\x00' * rsrcFork.size) file_rec = LeafRecord() file_rec.build_file(fileID, folder_rec.folderID, file_name, fileFork, rsrcFork, mod_date) # Look at this shinny new record... file_rec.dump() # Then insert it in btree # # This returns a new node !!! # The old one is unchanged. new_node = file_node.insert(file_rec) # is it good ? new_node.dump() # # What about the file thread ?????? thread_rec = LeafRecord() thread_rec.build_file_thread(fileID, folder_rec.folderID, file_name) # Look at this shinny new record... thread_rec.dump() new_thread_node = thread_file_node.insert(thread_rec) # is it good ? new_thread_node .dump() # # Time to update the folder # folder_rec.updateFolder(folder_rec.valence+1, mod_date) folder_node.put_record(folder_rec.idx, folder_rec.data) # # # we should put some real data in this file.... # # idx=0 while 1: block = file_src.read(fileFork.volume.blockSize) if not block: break fileFork.write_block(idx, block) if (idx % 10)==0: print "# %05d " % (idx), if (idx % 100)==0: print sys.__stdout__.flush() idx = idx +1 print 'end...' # And update volume hfsp_volume.update() # And update tree catalog_tree.update() # And update file node new_node.update() # And update thread file node new_thread_node.update() # And update folder node folder_node.update() # And STOP (lotsa data may be cached, and wrong). device.close() # And Smile