Big update from the ruby-ole trunk

git-svn-id: file:///home/svn/framework3/trunk@6522 4d416f70-5f16-0410-b530-b9f4589650da
2009-05-05 20:31:41 +00:00 · 2009-05-05 20:31:41 +00:00 · 43a03aa307
parent 4bafe57fe3
commit 43a03aa307
11 changed files with 2368 additions and 1150 deletions
--- a/lib/ole/base.rb
+++ b/lib/ole/base.rb
@ -1,5 +1,7 @@
-
-module Ole # :nodoc:
-	require 'ole/support'
-	Log = Logger.new_with_callstack
-end
+
+require 'ole/support'
+
+module Ole # :nodoc:
+	Log = Logger.new_with_callstack
+end
+
--- a/lib/ole/file_system.rb
+++ b/lib/ole/file_system.rb
@ -1,169 +1,2 @@
-=begin
-
-full file_system module
-will be available and recommended usage, allowing Ole::Storage, Dir, and Zip::ZipFile to be
-used pretty exchangably down the track. should be possible to write a recursive copy using
-the plain api, such that you can copy dirs/files agnostically between any of ole docs, dirs,
-and zip files.
-
-i think its okay to have an api like this on top, but there are certain things that ole
-does that aren't captured.
-ole::storage can have multiple files with the same name, for example, or with / in the
-name, and other things that are probably invalid anyway.
-i think this should remain an addon, built on top of my core api.
-but still the ideas can be reflected in the core, ie, changing the read/write semantics.
-
-once the core changes are complete, this will be a pretty straight forward file to complete.
-
-=end
-
-module Ole
-	class Storage
-		def file
-			@file ||= FileParent.new self
-		end
-
-		def dir
-			@dir ||= DirParent.new self
-		end
-
-		def dirent_from_path path_str
-			path = path_str.sub(/^\/*/, '').sub(/\/*$/, '')
-			dirent = @root
-			return dirent if path.empty?
-			path = path.split /\/+/
-			until path.empty?
-				raise "invalid path #{path_str.inspect}" if dirent.file?
-				if tmp = dirent[path.shift]
-					dirent = tmp
-				else
-					# allow write etc later.
-					raise "invalid path #{path_str.inspect}"
-				end
-			end
-			dirent
-		end
-
-		class FileParent
-			def initialize ole
-				@ole = ole
-			end
-
-			def open path_str, mode='r'
-				dirent = @ole.dirent_from_path path_str
-				# like Errno::EISDIR
-				raise "#{path_str.inspect} is a directory" unless dirent.file?
-				io = dirent.io
-				if block_given?
-					yield io
-				else
-					io
-				end
-			end
-
-			alias new :open
-
-			def read path
-				open(path) { |f| f.read }
-			end
-
-			# crappy copy from Dir.
-			def unlink path
-				dirent = @ole.dirent_from_path path
-				# EPERM 
-				raise "operation not permitted #{path.inspect}" unless dirent.file?
-				# i think we should free all of our blocks. i think the best way to do that would be
-				# like:
-				# open(path) { |f| f.truncate 0 }. which should free all our blocks from the
-				# allocation table. then if we remove ourself from our parent, we won't be part of
-				# the bat at save time.
-				# i think if you run repack, all free blocks should get zeroed.
-				parent = @ole.dirent_from_path(('/' + path).sub(/\/[^\/]+$/, ''))
-				parent.children.delete dirent
-				1 # hmmm. as per ::File ?
-			end
-		end
-
-		class DirParent
-			def initialize ole
-				@ole = ole
-			end
-
-			def open path_str
-				dirent = @ole.dirent_from_path path_str
-				# like Errno::ENOTDIR
-				raise "#{path_str.inspect} is not a directory" unless dirent.dir?
-				dir = Dir.new dirent, path_str
-				if block_given?
-					yield dir
-				else
-					dir
-				end
-			end
-
-			# certain Dir class methods proxy in this fashion:
-			def entries path
-				open(path) { |dir| dir.entries }
-			end
-
-			# there are some other important ones, like:
-			# chroot (!), mkdir, chdir, rmdir, glob etc etc. for now, i think
-			# mkdir, and rmdir are the main ones we'd need to support
-			def rmdir path
-				dirent = @ole.dirent_from_path path
-
-				# repeating myself
-				raise "#{path.inspect} is not a directory" unless dirent.dir?
-				# ENOTEMPTY:
-				raise "directory not empty #{path.inspect}" unless dirent.children.empty?
-				# now delete it, how to do that? the canonical representation that is
-				# maintained is the root tree, and the children array. we must remove it
-				# from the children array.
-				# we need the parent then. this sucks but anyway:
-				parent = @ole.dirent_from_path path.sub(/\/[^\/]+$/, '') || '/'
-				# note that the way this currently works, on save and repack time this will get
-				# reflected. to work properly, ie to make a difference now it would have to re-write
-				# the dirent. i think that Ole::Storage#close will handle that. and maybe include a
-				# #repack.
-				parent.children.delete dirent
-				0 # hmmm. as per ::Dir ?
-			end
-
-			class Dir
-				include Enumerable
-				attr_reader :dirent, :path, :entries, :pos
-
-				def initialize dirent, path
-					@dirent, @path = dirent, path
-					@pos = 0
-					# FIXME: hack, and probably not really desired
-					@entries = %w[. ..] + @dirent.children.map(&:name)
-				end
-
-				def each(&block)
-					@entries.each(&block)
-				end
-
-				def close
-				end
-
-				def read
-					@entries[@pos]
-				ensure
-					@pos += 1 if @pos < @entries.length
-				end
-
-				def pos= pos
-					@pos = [[0, pos].max, @entries.length].min
-				end
-
-				def rewind
-					@pos = 0
-				end
-
-				alias tell :pos
-				alias seek :pos=
-			end
-		end
-	end
-end
+# keeping this file around for now, but will delete later on...
+require 'ole/storage/file_system'
--- a/lib/ole/ranges_io.rb
+++ b/lib/ole/ranges_io.rb
@ -0,0 +1,231 @@
+# need IO::Mode
+require 'ole/support'
+
+#
+# = Introduction
+#
+# +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder
+# slices of the input file by providing a list of ranges. Intended as an initial measure to curb
+# inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with
+# no method to stream it.
+# 
+# This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file
+# and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just
+# getting 16 bytes doesn't read the whole thing).
+#
+# In the simplest case it can be used with a single range to provide a limited io to a section of
+# a file.
+#
+# = Limitations
+#
+# * No buffering. by design at the moment. Intended for large reads
+# 
+# = TODO
+# 
+# On further reflection, this class is something of a joining/optimization of
+# two separate IO classes. a SubfileIO, for providing access to a range within
+# a File as a separate IO object, and a ConcatIO, allowing the presentation of
+# a bunch of io objects as a single unified whole.
+# 
+# I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will
+# convert a whole mime message into an IO stream, that can be read from.
+# It will just be the concatenation of a series of IO objects, corresponding to
+# headers and boundaries, as StringIO's, and SubfileIO objects, coming from the
+# original message proper, or RangesIO as provided by the Attachment#data, that
+# will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the-
+# fly. Thus the attachment, in its plain or encoded form, and the message as a
+# whole never exists as a single string in memory, as it does now. This is a
+# fair bit of work to achieve, but generally useful I believe.
+# 
+# This class isn't ole specific, maybe move it to my general ruby stream project.
+# 
+class RangesIO
+	attr_reader :io, :mode, :ranges, :size, :pos
+	# +io+:: the parent io object that we are wrapping.
+	# +mode+:: the mode to use
+	# +params+:: hash of params.
+	# * :ranges - byte offsets, either:
+	#   1. an array of ranges [1..2, 4..5, 6..8] or
+	#   2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
+	#      (think the way String indexing works)
+	# * :close_parent - boolean to close parent when this object is closed
+	#
+	# NOTE: the +ranges+ can overlap.
+	def initialize io, mode='r', params={}
+		mode, params = 'r', mode if Hash === mode
+		ranges = params[:ranges]
+		@params = {:close_parent => false}.merge params
+		@mode = IO::Mode.new mode
+		@io = io
+		# convert ranges to arrays. check for negative ranges?
+		ranges ||= [0, io.size]
+		@ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
+		# calculate size
+		@size = @ranges.inject(0) { |total, (pos, len)| total + len }
+		# initial position in the file
+		@pos = 0
+
+		# handle some mode flags
+		truncate 0 if @mode.truncate?
+		seek size if @mode.append?
+	end
+
+#IOError: closed stream
+# get this for reading, writing, everything...
+#IOError: not opened for writing
+
+	# add block form. TODO add test for this
+	def self.open(*args, &block)
+		ranges_io = new(*args)
+		if block_given?
+			begin;  yield ranges_io
+			ensure; ranges_io.close
+			end
+		else
+			ranges_io
+		end
+	end
+
+	def pos= pos, whence=IO::SEEK_SET
+		case whence
+		when IO::SEEK_SET
+		when IO::SEEK_CUR
+			pos += @pos
+		when IO::SEEK_END
+			pos = @size + pos
+		else raise Errno::EINVAL
+		end
+		raise Errno::EINVAL unless (0...@size) === pos
+		@pos = pos
+	end
+
+	alias seek :pos=
+	alias tell :pos
+
+	def close
+		@io.close if @params[:close_parent]
+	end
+
+	# returns the [+offset+, +size+], pair inorder to read/write at +pos+
+	# (like a partial range), and its index.
+	def offset_and_size pos
+		total = 0
+		ranges.each_with_index do |(offset, size), i|
+			if pos <= total + size
+				diff = pos - total
+				return [offset + diff, size - diff], i
+			end
+			total += size
+		end
+		# should be impossible for any valid pos, (0...size) === pos
+		raise ArgumentError, "no range for pos #{pos.inspect}"
+	end
+
+	def eof?
+		@pos == @size
+	end
+
+	# read bytes from file, to a maximum of +limit+, or all available if unspecified.
+	def read limit=nil
+		data = ''
+		return data if eof?
+		limit ||= size
+		partial_range, i = offset_and_size @pos
+		# this may be conceptually nice (create sub-range starting where we are), but
+		# for a large range array its pretty wasteful. even the previous way was. but
+		# i'm not trying to optimize this atm. it may even go to c later if necessary.
+		([partial_range] + ranges[i+1..-1]).each do |pos, len|
+			@io.seek pos
+			if limit < len
+				# convoluted, to handle read errors. s may be nil
+				s = @io.read limit
+				@pos += s.length if s
+				break data << s
+			end
+			# convoluted, to handle ranges beyond the size of the file
+			s = @io.read len
+			@pos += s.length if s
+			data << s
+			break if s.length != len
+			limit -= len
+		end
+		data
+	end
+
+	# you may override this call to update @ranges and @size, if applicable.
+	def truncate size
+		raise NotImplementedError, 'truncate not supported'
+	end
+
+	# using explicit forward instead of an alias now for overriding.
+	# should override truncate.
+	def size=	size
+		truncate size
+	end
+
+	def write data
+		# short cut. needed because truncate 0 may return no ranges, instead of empty range,
+		# thus offset_and_size fails.
+		return 0 if data.empty?
+		data_pos = 0
+		# if we don't have room, we can use the truncate hook to make more space.
+		if data.length > @size - @pos
+			begin
+				truncate @pos + data.length
+			rescue NotImplementedError
+				raise IOError, "unable to grow #{inspect} to write #{data.length} bytes" 
+			end
+		end
+		partial_range, i = offset_and_size @pos
+		([partial_range] + ranges[i+1..-1]).each do |pos, len|
+			@io.seek pos
+			if data_pos + len > data.length
+				chunk = data[data_pos..-1]
+				@io.write chunk
+				@pos += chunk.length
+				data_pos = data.length
+				break
+			end
+			@io.write data[data_pos, len]
+			@pos += len
+			data_pos += len
+		end
+		data_pos
+	end
+	
+	alias << write
+
+	# i can wrap it in a buffered io stream that
+	# provides gets, and appropriately handle pos,
+	# truncate. mostly added just to past the tests.
+	# FIXME
+	def gets
+		s = read 1024
+		i = s.index "\n"
+		@pos -= s.length - (i+1)
+		s[0..i]
+	end
+	alias readline :gets
+
+	def inspect
+		# the rescue is for empty files
+		pos, len = (@ranges[offset_and_size(@pos).last] rescue [nil, nil])
+		range_str = pos ? "#{pos}..#{pos+len}" : 'nil'
+		"#<#{self.class} io=#{io.inspect}, size=#@size, pos=#@pos, "\
+			"range=#{range_str}>"
+	end
+end
+
+# this subclass of ranges io explicitly ignores the truncate part of 'w' modes.
+# only really needed for the allocation table writes etc. maybe just use explicit modes
+# for those
+# better yet write a test that breaks before I fix it. added nodoc for the 
+# time being.
+class RangesIONonResizeable < RangesIO # :nodoc:
+	def initialize io, mode='r', params={}
+		mode, params = 'r', mode if Hash === mode
+		flags = IO::Mode.new(mode).flags & ~IO::TRUNC
+		super io, flags, params
+	end
+end
+
--- a/lib/ole/storage.rb
+++ b/lib/ole/storage.rb
@ -1,934 +1,3 @@
-#! /usr/bin/ruby -w
-
-require 'iconv'
-require 'date'
-require 'stringio'
-require 'tempfile'
-
-
-require 'ole/base'
-require 'ole/types'
-require 'ole/io_helpers'
-
-module Ole # :nodoc:
-
-	# 
-	# = Introduction
-	#
-	# <tt>Ole::Storage</tt> is a simple class intended to abstract away details of the
-	# access to OLE2 structured storage files, such as those produced by
-	# Microsoft Office, eg *.doc, *.msg etc.
-	#
-	# Initially based on chicago's libole, source available at
-	# http://prdownloads.sf.net/chicago/ole.tgz
-	# Later augmented with some corrections by inspecting pole, and (purely
-	# for header definitions) gsf.
-	#
-	# = Usage
-	#
-	# Usage should be fairly straight forward:
-	#
-	#   # get the parent ole storage object
-	#   ole = Ole::Storage.open 'myfile.msg', 'r+'
-	#   # => #<Ole::Storage io=#<File:myfile.msg> root=#<Dirent:"Root Entry">>
-	#   # read some data
-	#   ole.root[1].read 4
-	#   # => "\001\000\376\377"
-	#   # get the top level root object and output a tree structure for
-	#   # debugging
-	#   puts ole.root.to_tree
-	#   # =>
-	#   - #<Dirent:"Root Entry" size=3840 time="2006-11-03T00:52:53Z">
-	#     |- #<Dirent:"__nameid_version1.0" size=0 time="2006-11-03T00:52:53Z">
-	#     |  |- #<Dirent:"__substg1.0_00020102" size=16 data="CCAGAAAAAADAAA...">
-	#     ...
-	#     |- #<Dirent:"__substg1.0_8002001E" size=4 data="MTEuMA==">
-	#     |- #<Dirent:"__properties_version1.0" size=800 data="AAAAAAAAAAABAA...">
-	#     \- #<Dirent:"__recip_version1.0_#00000000" size=0 time="2006-11-03T00:52:53Z">
-	#        |- #<Dirent:"__substg1.0_0FF60102" size=4 data="AAAAAA==">
-	#   	 ...
-	#   # write some data, and finish up (note that open is 'r+', so this overwrites
-	#   # but doesn't truncate)
-	#   ole.root["\001CompObj"].open { |f| f.write "blah blah" }
-	#   ole.close
-	#
-	# = TODO
-	#
-	# 1. tests. lock down how things work at the moment - mostly good.
-	#    create from scratch works now, as does copying in a subtree of another doc, so
-	#    ole embedded attachment serialization works now. i can save embedded xls in an msg
-	#    into a separate file, and open it. this was a goal. now i would want to implemenet
-	#    to_mime conversion for embedded attachments, that serializes them to ole, but handles
-	#    some separately like various meta file types as plain .wmf attachments perhaps. this
-	#    will give pretty good .eml's from emails with embedded attachments.
-	#    the other todo is .rtf output, with full support for embedded ole objects...
-	# 2. lots of tidying up
-	#    - main FIXME's in this regard are:
-	#      * the custom header cruft for Header and Dirent needs some love.
-	#      * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent,
-	#        and, in a manner of speaking, but arguably different, Storage itself.
-	#        they have differing api's which would be nice to clean.
-	#        AllocationTable::Big must be created aot now, as it is used for all subsequent reads.
-	#     * ole types need work, can't serialize datetime at the moment.
-	# 3. need to fix META_BAT support in #flush.
-	#
-	class Storage
-		VERSION = '1.1.1'
-
-		# The top of the ole tree structure
-		attr_reader :root
-		# The tree structure in its original flattened form. only valid after #load, or #flush.
-		attr_reader :dirents
-		# The underlying io object to/from which the ole object is serialized, whether we
-		# should close it, and whether it is writeable
-		attr_reader :io, :close_parent, :writeable
-		# Low level internals, you probably shouldn't need to mess with these
-		attr_reader :header, :bbat, :sbat, :sb_file
-
-		# maybe include an option hash, and allow :close_parent => true, to be more general.
-		# +arg+ should be either a file, or an +IO+ object, and needs to be seekable.
-		def initialize arg, mode=nil
-			# get the io object
-			@close_parent, @io = if String === arg
-				[true, open(arg, mode || 'rb')]
-			else
-				raise 'unable to specify mode string with io object' if mode
-				[false, arg]
-			end
-			# do we have this file opened for writing? don't know of a better way to tell
-			@writeable = begin
-				@io.flush
-				true
-			rescue IOError
-				false
-			end
-			# silence undefined warning in clear
-			@sb_file = nil
-			# if the io object has data, we should load it, otherwise start afresh
-			if @io.size > 0; load
-			else clear
-			end
-		end
-
-		def self.new arg, mode=nil
-			ole = super
-			if block_given?
-				begin   yield ole
-				ensure; ole.close
-				end
-			else ole
-			end
-		end
-
-		class << self
-			# encouraged
-			alias open :new
-			# deprecated
-			alias load :new
-		end
-
-		# load document from file.
-		def load
-			# we always read 512 for the header block. if the block size ends up being different,
-			# what happens to the 109 fat entries. are there more/less entries?
-			@io.rewind
-			header_block = @io.read 512
-			@header = Header.load header_block
-
-			# create an empty bbat
-			@bbat = AllocationTable::Big.new self
-			# extra mbat blocks
-			mbat_blocks = (0...@header.num_mbat).map { |i| i + @header.mbat_start }
-			bbat_chain = (header_block[Header::SIZE..-1] + @bbat.read(mbat_blocks)).unpack 'L*'
-			# am i using num_bat in the right way?
-			@bbat.load @bbat.read(bbat_chain[0, @header.num_bat])
-	
-			# get block chain for directories, read it, then split it into chunks and load the
-			# directory entries. semantics changed - used to cut at first dir where dir.type == 0
-			@dirents = @bbat.read(@header.dirent_start).scan(/.{#{Dirent::SIZE}}/mo).
-				map { |str| Dirent.load self, str }.reject { |d| d.type_id == 0 }
-
-			# now reorder from flat into a tree
-			# links are stored in some kind of balanced binary tree
-			# check that everything is visited at least, and at most once
-			# similarly with the blocks of the file.
-			# was thinking of moving this to Dirent.to_tree instead.
-			class << @dirents
-				def to_tree idx=0
-					return [] if idx == Dirent::EOT
-					d = self[idx]
-					d.children = to_tree d.child
-					raise "directory #{d.inspect} used twice" if d.idx
-					d.idx = idx
-					to_tree(d.prev) + [d] + to_tree(d.next)
-				end
-			end
-
-			@root = @dirents.to_tree.first
-			Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry'
-			unused = @dirents.reject(&:idx).length
-			Log.warn "* #{unused} unused directories" if unused > 0
-
-			# FIXME i don't currently use @header.num_sbat which i should
-			# hmm. nor do i write it. it means what exactly again?
-			@sb_file = RangesIOResizeable.new @bbat, @root.first_block, @root.size
-			@sbat = AllocationTable::Small.new self
-			@sbat.load @bbat.read(@header.sbat_start)
-		end
-
-		def close
-			flush if @writeable
-			@sb_file.close
-			@io.close if @close_parent
-		end
-
-		# should have a #open_dirent i think. and use it in load and flush. neater.
-		# also was thinking about Dirent#open_padding. then i can more easily clean up the padding
-		# to be 0.chr
-=begin
-thoughts on fixes:
-1. reterminate any chain not ending in EOC. 
-2. pass through all chain heads looking for collisions, and making sure nothing points to them
-   (ie they are really heads).
-3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
-   in the bat for them.
-this stuff will ensure reliability of input better. otherwise, its actually worth doing a repack
-directly after read, to ensure the above is probably acounted for, before subsequent writes possibly
-destroy things.
-=end
-		def flush
-			# recreate dirs from our tree, split into dirs and big and small files
-			@root.type = :root
-			# for now.
-			@root.name = 'Root Entry'
-			@root.first_block = @sb_file.first_block
-			@root.size = @sb_file.size
-			@dirents = @root.flatten
-			#dirs, files = @dirents.partition(&:dir?)
-			#big_files, small_files = files.partition { |file| file.size > @header.threshold }
-
-			# maybe i should move the block form up to RangesIO, and get it for free at all levels.
-			# Dirent#open gets block form for free then
-			io = RangesIOResizeable.new @bbat, @header.dirent_start
-			io.truncate 0
-			@dirents.each { |dirent| io.write dirent.save }
-			padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size
-			#p [:padding, padding]
-			io.write 0.chr * padding
-			@header.dirent_start = io.first_block
-			io.close
-
-			# similarly for the sbat data.
-			io = RangesIOResizeable.new @bbat, @header.sbat_start
-			io.truncate 0
-			io.write @sbat.save
-			@header.sbat_start = io.first_block
-			@header.num_sbat = @bbat.chain(@header.sbat_start).length
-			io.close
-
-			# what follows will be slightly more complex for the bat fiddling.
-
-			# create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using
-			# truncate. then when its time to write, convert that chain and some chunk of blocks at
-			# the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its
-			# done.
-
-			@bbat.table.map! do |b|
-				b == AllocationTable::BAT || b == AllocationTable::META_BAT ?
-					AllocationTable::AVAIL : b
-			end
-			io = RangesIOResizeable.new @bbat, AllocationTable::EOC
-			# use crappy loop for now:
-			while true
-				bbat_data = @bbat.save
-				#mbat_data = bbat_data.length / @bbat.block_size * 4
-				mbat_chain = @bbat.chain io.first_block
-				raise NotImplementedError, "don't handle writing out extra META_BAT blocks yet" if mbat_chain.length > 109
-				# so we can ignore meta blocks in this calculation:
-				break if io.size >= bbat_data.length # it shouldn't be bigger right?
-				# this may grow the bbat, depending on existing available blocks
-				io.truncate bbat_data.length
-			end
-
-			# now extract the info we want:
-			ranges = io.ranges
-			mbat_chain = @bbat.chain io.first_block
-			io.close
-			mbat_chain.each { |b| @bbat.table[b] = AllocationTable::BAT }
-			@header.num_bat = mbat_chain.length
-			#p @bbat.truncated_table
-			#p ranges
-			#p mbat_chain
-			# not resizeable!
-			io = RangesIO.new @io, ranges
-			io.write @bbat.save
-			io.close
-			mbat_chain += [AllocationTable::AVAIL] * (109 - mbat_chain.length)
-			@header.mbat_start = AllocationTable::EOC
-			@header.num_mbat = 0
-
-=begin
-			bbat_data = new_bbat.save
-			# must exist as linear chain stored in header.
-			@header.num_bat = (bbat_data.length / new_bbat.block_size.to_f).ceil
-			base = io.pos / new_bbat.block_size - 1
-			io.write bbat_data
-			# now that spanned a number of blocks:
-			mbat = (0...@header.num_bat).map { |i| i + base }
-			mbat += [AllocationTable::AVAIL] * (109 - mbat.length) if mbat.length < 109
-			header_mbat = mbat[0...109]
-			other_mbat_data = mbat[109..-1].pack 'L*'
-			@header.mbat_start = base + @header.num_bat
-			@header.num_mbat = (other_mbat_data.length / new_bbat.block_size.to_f).ceil
-			io.write other_mbat_data
-=end
-
-			@root.type = :dir
-
-			# now seek back and write the header out
-			@io.seek 0
-			@io.write @header.save + mbat_chain.pack('L*')
-			@io.flush
-		end
-
-		def clear
-			# first step though is to support modifying pre-existing and saving, then this
-			# missing gap will be fairly straight forward - essentially initialize to
-			# equivalent of loading an empty ole document.
-			#raise NotImplementedError, 'unable to create new ole objects from scratch as yet'
-			Log.warn 'creating new ole storage object on non-writable io' unless @writeable
-			@header = Header.new
-			@bbat = AllocationTable::Big.new self
-			@root = Dirent.new self, :dir
-			@root.name = 'Root Entry'
-			@dirents = [@root]
-			@root.idx = 0
-			@root.children = []
-			# size shouldn't display for non-files
-			@root.size = 0
-			@sb_file.close if @sb_file
-			@sb_file = RangesIOResizeable.new @bbat, AllocationTable::EOC
-			@sbat = AllocationTable::Small.new self
-			# throw everything else the hell away
-			@io.truncate 0
-		end
-
-		# could be useful with mis-behaving ole documents. or to just clean them up.
-		def repack temp=:file
-			case temp
-			when :file; Tempfile.open 'w+', &method(:repack_using_io)
-			when :mem;  StringIO.open(&method(:repack_using_io))
-			else raise "unknown temp backing #{temp.inspect}"
-			end
-		end
-
-		def repack_using_io temp_io
-			@io.rewind
-			IO.copy @io, temp_io
-			clear
-			Storage.open temp_io do |temp_ole|
-				temp_ole.root.type = :dir
-				Dirent.copy temp_ole.root, root
-			end
-		end
-
-		def bat_for_size size
-			# note >=, not > previously.
-			size >= @header.threshold ? @bbat : @sbat
-		end
-
-		def inspect
-			"#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>"
-		end
-
-		# A class which wraps the ole header
-		class Header < Struct.new(
-				:magic, :clsid, :minor_ver, :major_ver, :byte_order, :b_shift, :s_shift,
-				:reserved, :csectdir, :num_bat, :dirent_start, :transacting_signature, :threshold,
-				:sbat_start, :num_sbat, :mbat_start, :num_mbat
-			)
-			PACK = 'a8 a16 S2 a2 S2 a6 L3 a4 L5'
-			SIZE = 0x4c
-			# i have seen it pointed out that the first 4 bytes of hex,
-			# 0xd0cf11e0, is supposed to spell out docfile. hmmm :)
-			MAGIC = "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"  # expected value of Header#magic
-			# what you get if creating new header from scratch.
-			# AllocationTable::EOC isn't available yet. meh.
-			EOC = 0xfffffffe
-			DEFAULT = [
-				MAGIC, 0.chr * 16, 59, 3, "\xfe\xff", 9, 6,
-				0.chr * 6, 0, 1, EOC, 0.chr * 4,
-				4096, EOC, 0, EOC, 0
-			]
-
-			# 2 basic initializations, from scratch, or from a data string.
-			# from scratch will be geared towards creating a new ole object
-			def initialize *values
-				super(*(values.empty? ? DEFAULT : values))
-				validate!
-			end
-
-			def self.load str
-				Header.new(*str.unpack(PACK))
-			end
-
-			def save
-				to_a.pack PACK
-			end
-
-			def validate!
-				raise "OLE2 signature is invalid" unless magic == MAGIC
-				if num_bat == 0 or # is that valid for a completely empty file?
-					 # not sure about this one. basically to do max possible bat given size of mbat
-					 num_bat > 109 && num_bat > 109 + num_mbat * (1 << b_shift - 2) or
-					 # shouldn't need to use the mbat as there is enough space in the header block
-					 num_bat < 109 && num_mbat != 0 or
-					 # given the size of the header is 76, if b_shift <= 6, blocks address the header.
-					 s_shift > b_shift or b_shift <= 6 or b_shift >= 31 or
-					 # we only handle little endian
-					 byte_order != "\xfe\xff"
-					raise "not valid OLE2 structured storage file"
-				end
-				# relaxed this, due to test-msg/qwerty_[1-3]*.msg they all had
-				# 3 for this value. 
-				# transacting_signature != "\x00" * 4 or
-				if threshold != 4096 or
-					 num_mbat == 0 && mbat_start != AllocationTable::EOC or
-					 reserved != "\x00" * 6
-					Log.warn "may not be a valid OLE2 structured storage file"
-				end
-				true
-			end
-		end
-
-		#
-		# +AllocationTable+'s hold the chains corresponding to files. Given
-		# an initial index, <tt>AllocationTable#chain</tt> follows the chain, returning
-		# the blocks that make up that file.
-		#
-		# There are 2 allocation tables, the bbat, and sbat, for big and small
-		# blocks respectively. The block chain should be loaded using either
-		# <tt>Storage#read_big_blocks</tt> or <tt>Storage#read_small_blocks</tt>
-		# as appropriate.
-		#
-		# Whether or not big or small blocks are used for a file depends on
-		# whether its size is over the <tt>Header#threshold</tt> level.
-		#
-		# An <tt>Ole::Storage</tt> document is serialized as a series of directory objects,
-		# which are stored in blocks throughout the file. The blocks are either
-		# big or small, and are accessed using the <tt>AllocationTable</tt>.
-		#
-		# The bbat allocation table's data is stored in the spare room in the header
-		# block, and in extra blocks throughout the file as referenced by the meta
-		# bat.  That chain is linear, as there is no higher level table.
-		#
-		class AllocationTable
-			# a free block (I don't currently leave any blocks free), although I do pad out
-			# the allocation table with AVAIL to the block size.
-			AVAIL		 = 0xffffffff
-			EOC			 = 0xfffffffe # end of a chain
-			# these blocks correspond to the bat, and aren't part of a file, nor available.
-			# (I don't currently output these)
-			BAT			 = 0xfffffffd
-			META_BAT = 0xfffffffc
-
-			attr_reader :ole, :io, :table, :block_size
-			def initialize ole
-				@ole = ole
-				@table = []
-			end
-
-			def load data
-				@table = data.unpack('L*')
-			end
-
-			def truncated_table
-				# this strips trailing AVAILs. come to think of it, this has the potential to break
-				# bogus ole. if you terminate using AVAIL instead of EOC, like I did before. but that is
-				# very broken. however, if a chain ends with AVAIL, it should probably be fixed to EOC
-				# at load time.
-				temp = @table.reverse
-				not_avail = temp.find { |b| b != AVAIL } and temp = temp[temp.index(not_avail)..-1]
-				temp.reverse
-			end
-
-			def save
-				table = truncated_table #@table
-				# pad it out some
-				num = @ole.bbat.block_size / 4
-				# do you really use AVAIL? they probably extend past end of file, and may shortly
-				# be used for the bat. not really good.
-				table += [AVAIL] * (num - (table.length % num)) if (table.length % num) != 0
-				table.pack 'L*'
-			end
-
-			# rewriting this to be non-recursive. it broke on a large attachment
-			# building up the chain, causing a stack error. need tail-call elimination...
-			def chain start
-				a = []
-				idx = start
-				until idx >= META_BAT
-					raise "broken allocationtable chain" if idx < 0 || idx > @table.length
-					a << idx
-					idx = @table[idx]
-				end
-				Log.warn "invalid chain terminator #{idx}" unless idx == EOC
-				a
-			end
-			
-			def ranges chain, size=nil
-				chain = self.chain(chain) unless Array === chain
-				blocks_to_ranges chain, size
-			end
-
-		# Turn a chain (an array given by +chain+) of big blocks, optionally
-		# truncated to +size+, into an array of arrays describing the stretches of
-		# bytes in the file that it belongs to.
-		#
-		# Big blocks are of size Ole::Storage::Header#b_size, and are stored
-		# directly in the parent file.
-			# truncate the chain if required
-			# convert chain to ranges of the block size
-			# truncate final range if required
-
-			def blocks_to_ranges chain, size=nil
-				chain = chain[0...(size.to_f / block_size).ceil] if size
-				ranges = chain.map { |i| [block_size * i, block_size] }
-				ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size
-				ranges
-			end
-
-			# quick shortcut. chain can be either a head (in which case the table is used to
-			# turn it into a chain), or a chain. it is converted to ranges, then to rangesio.
-			# its not resizeable or migrateable. it probably could be resizeable though, using
-			# self as the bat. but what would the first_block be?
-			def open chain, size=nil
-				io = RangesIO.new @io, ranges(chain, size)
-				if block_given?
-					begin   yield io
-					ensure; io.close
-					end
-				else io
-				end
-			end
-
-			def read chain, size=nil
-				open chain, size, &:read
-			end
-
-			# ----------------------
-
-			def get_free_block
-				@table.each_index { |i| return i if @table[i] == AVAIL }
-				@table.push AVAIL
-				@table.length - 1
-			end
-
-			# must return first_block
-			def resize_chain first_block, size
-				new_num_blocks = (size / block_size.to_f).ceil
-				blocks = chain first_block
-				old_num_blocks = blocks.length
-				if new_num_blocks < old_num_blocks
-					# de-allocate some of our old blocks. TODO maybe zero them out in the file???
-					(new_num_blocks...old_num_blocks).each { |i| @table[blocks[i]] = AVAIL }
-					# if we have a chain, terminate it and return head, otherwise return EOC
-					if new_num_blocks > 0
-						@table[blocks[new_num_blocks-1]] = EOC
-						first_block
-					else EOC
-					end
-				elsif new_num_blocks > old_num_blocks
-					# need some more blocks.
-					last_block = blocks.last
-					(new_num_blocks - old_num_blocks).times do
-						block = get_free_block
-						# connect the chain. handle corner case of blocks being [] initially
-						if last_block
-							@table[last_block] = block 
-						else
-							first_block = block
-						end
-						last_block = block
-						# this is just to inhibit the problem where it gets picked as being a free block
-						# again next time around.
-						@table[last_block] = EOC
-					end
-					first_block
-				else first_block
-				end
-			end
-
-			class Big < AllocationTable
-				def initialize(*args)
-					super
-					@block_size = 1 << @ole.header.b_shift
-					@io = @ole.io
-				end
-
-				# Big blocks are kind of -1 based, in order to not clash with the header.
-				def blocks_to_ranges blocks, size
-					super blocks.map { |b| b + 1 }, size
-				end
-			end
-
-			class Small < AllocationTable
-				def initialize(*args)
-					super
-					@block_size = 1 << @ole.header.s_shift
-					@io = @ole.sb_file
-				end
-			end
-		end
-
-		# like normal RangesIO, but Ole::Storage specific. the ranges are backed by an
-		# AllocationTable, and can be resized. used for read/write to 2 streams:
-		# 1. serialized dirent data
-		# 2. sbat table data
-		# 3. all dirents but through RangesIOMigrateable below
-		#
-		# Note that all internal access to first_block is through accessors, as it is sometimes
-		# useful to redirect it.
-		class RangesIOResizeable < RangesIO
-			attr_reader   :bat
-			attr_accessor :first_block
-			def initialize bat, first_block, size=nil
-				@bat = bat
-				self.first_block = first_block
-				super @bat.io, @bat.ranges(first_block, size)
-			end
-
-			def truncate size
-				# note that old_blocks is != @ranges.length necessarily. i'm planning to write a
-				# merge_ranges function that merges sequential ranges into one as an optimization.
-				self.first_block = @bat.resize_chain first_block, size
-				@ranges = @bat.ranges first_block, size
-				@pos = @size if @pos > size
-
-				# don't know if this is required, but we explicitly request our @io to grow if necessary
-				# we never shrink it though. maybe this belongs in allocationtable, where smarter decisions
-				# can be made.
-				# maybe its ok to just seek out there later??
-				max = @ranges.map { |pos, len| pos + len }.max || 0
-				@io.truncate max if max > @io.size
-
-				@size = size
-			end
-		end
-
-		# like RangesIOResizeable, but Ole::Storage::Dirent specific. provides for migration
-		# between bats based on size, and updating the dirent, instead of the ole copy back
-		# on close.
-		class RangesIOMigrateable < RangesIOResizeable
-			attr_reader :dirent
-			def initialize dirent
-				@dirent = dirent
-				super @dirent.ole.bat_for_size(@dirent.size), @dirent.first_block, @dirent.size
-			end
-
-			def truncate size
-				bat = @dirent.ole.bat_for_size size
-				if bat != @bat
-					# bat migration needed! we need to backup some data. the amount of data
-					# should be <= @ole.header.threshold, so we can just hold it all in one buffer.
-					# backup this
-					pos = @pos
-					@pos = 0
-					keep = read [@size, size].min
-					# this does a normal truncate to 0, removing our presence from the old bat, and
-					# rewrite the dirent's first_block
-					super 0
-					@bat = bat
-					# just change the underlying io from right under everyone :)
-					@io = bat.io
-					# important to do this now, before the write. as the below write will always
-					# migrate us back to sbat! this will now allocate us +size+ in the new bat.
-					super
-					@pos = 0
-					write keep
-					@pos = pos
-				else
-					super
-				end
-				# now just update the file
-				@dirent.size = size
-			end
-
-			# forward this to the dirent
-			def first_block
-				@dirent.first_block
-			end
-
-			def first_block= val
-				@dirent.first_block = val
-			end
-		end
-
-		#
-		# A class which wraps an ole directory entry. Can be either a directory
-		# (<tt>Dirent#dir?</tt>) or a file (<tt>Dirent#file?</tt>)
-		#
-		# Most interaction with <tt>Ole::Storage</tt> is through this class.
-		# The 2 most important functions are <tt>Dirent#children</tt>, and
-		# <tt>Dirent#data</tt>.
-		# 
-		# was considering separate classes for dirs and files. some methods/attrs only
-		# applicable to one or the other.
-		class Dirent
-			MEMBERS = [
-				:name_utf16, :name_len, :type_id, :colour, :prev, :next, :child,
-				:clsid, :flags, # dirs only
-				:create_time_str, :modify_time_str, # files only
-				:first_block, :size, :reserved
-			]
-			PACK = 'a64 S C C L3 a16 L a8 a8 L2 a4'
-			SIZE = 128
-			EPOCH = DateTime.parse '1601-01-01'
-			TYPE_MAP = {
-				# this is temporary
-				0 => :empty,
-				1 => :dir,
-				2 => :file,
-				5 => :root
-			}
-			COLOUR_MAP = {
-				0 => :red,
-				1 => :black
-			}
-			# used in the next / prev / child stuff to show that the tree ends here.
-			# also used for first_block for directory.
-			EOT = 0xffffffff
-			# All +Dirent+ names are in UTF16, which we convert
-			FROM_UTF16 = Iconv.new 'utf-8', 'utf-16le'
-			TO_UTF16   = Iconv.new 'utf-16le', 'utf-8'
-
-			include Enumerable
-
-			attr_accessor :values
-
-			# Dirent's should be created in 1 of 2 ways, either Dirent.new ole, [:dir/:file/:root],
-			# or Dirent.load '... dirent data ...'
-			# its a bit clunky, but thats how it is at the moment. you can assign to type, but
-			# shouldn't.
-
-			attr_accessor :idx
-			# This returns all the children of this +Dirent+. It is filled in
-			# when the tree structure is recreated.
-			attr_accessor :children
-			attr_reader :ole, :type, :create_time, :modify_time, :name
-			def initialize ole, type
-				@ole = ole
-				# this isn't really good enough. need default values put in there.
-				@values = [
-					0.chr * 2, 2, 0, # will get overwritten
-					1, EOT, EOT, EOT,
-					0.chr * 16, 0, nil, nil,
-					AllocationTable::EOC, 0, 0.chr * 4]
-				# maybe check types here. 
-				@type = type
-				@create_time = @modify_time = nil
-				@children = []
-				if file?
-					@create_time = Time.now
-					@modify_time = Time.now
-				end
-			end
-
-			def self.load ole, str
-				# load should function without the need for the initializer.
-				dirent = Dirent.allocate
-				dirent.load ole, str
-				dirent
-			end
-
-			def load ole, str
-				@ole = ole
-				@values = str.unpack PACK
-				@name = FROM_UTF16.iconv name_utf16[0...name_len].sub(/\x00\x00$/, '')
-				@type = TYPE_MAP[type_id] or raise "unknown type #{type_id.inspect}"
-				if file?
-					@create_time = Types.load_time create_time_str
-					@modify_time = Types.load_time modify_time_str
-				end
-			end
-
-			# only defined for files really. and the above children stuff is only for children.
-			# maybe i should have some sort of File and Dir class, that subclass Dirents? a dirent
-			# is just a data holder. 
-			# this can be used for write support if the underlying io object was opened for writing.
-			# maybe take a mode string argument, and do truncation, append etc stuff.
-			def open
-				return nil unless file?
-				io = RangesIOMigrateable.new self
-				if block_given?
-					begin   yield io
-					ensure; io.close
-					end
-				else io
-				end
-			end
-
-			def read limit=nil
-				open { |io| io.read limit }
-			end
-
-			def dir?
-				# to count root as a dir.
-				type != :file
-			end
-
-			def file?
-				type == :file
-			end
-
-			def time
-				# time is nil for streams, otherwise try to parse either of the time pairse (not
-				# sure of their meaning - created / modified?)
-				#@time ||= file? ? nil : (Dirent.parse_time(secs1, days1) || Dirent.parse_time(secs2, days2))
-				create_time || modify_time
-			end
-
-			def each(&block)
-				@children.each(&block)
-			end
-			
-			def [] idx
-				return children[idx] if Integer === idx
-				# path style look up.
-				# maybe take another arg to allow creation? or leave that to the filesystem
-				# add on. 
-				# not sure if '/' is a valid char in an Dirent#name, so no splitting etc at
-				# this level.
-				# also what about warning about multiple hits for the same name?
-				children.find { |child| idx === child.name }
-			end
-
-			# solution for the above '/' thing for now.
-			def / path
-				self[path]
-			end
-
-			def to_tree
-				if children and !children.empty?
-					str = "- #{inspect}\n"
-					children.each_with_index do |child, i|
-						last = i == children.length - 1
-						child.to_tree.split(/\n/).each_with_index do |line, j|
-							str << "  #{last ? (j == 0 ? "\\" : ' ') : '|'}#{line}\n"
-						end
-					end
-					str
-				else "- #{inspect}\n"
-				end
-			end
-
-			MEMBERS.each_with_index do |sym, i|
-				define_method(sym) { @values[i] }
-				define_method(sym.to_s + '=') { |val| @values[i] = val }
-			end
-
-			def to_a
-				@values
-			end
-
-			# flattens the tree starting from here into +dirents+. note it modifies its argument.
-			def flatten dirents=[]
-				@idx = dirents.length
-				dirents << self
-				children.each { |child| child.flatten dirents }
-				self.child = Dirent.flatten_helper children
-				dirents
-			end
-
-			# i think making the tree structure optimized is actually more complex than this, and
-			# requires some intelligent ordering of the children based on names, but as long as
-			# it is valid its ok.
-			# actually, i think its ok. gsf for example only outputs a singly-linked-list, where
-			# prev is always EOT.
-			def self.flatten_helper children
-				return EOT if children.empty?
-				i = children.length / 2
-				this = children[i]
-				this.prev, this.next = [(0...i), (i+1..-1)].map { |r| flatten_helper children[r] }
-				this.idx
-			end
-
-			attr_accessor :name, :type
-			def save
-				tmp = TO_UTF16.iconv(name)
-				tmp = tmp[0, 62] if tmp.length > 62
-				tmp += 0.chr * 2
-				self.name_len = tmp.length
-				self.name_utf16 = tmp + 0.chr * (64 - tmp.length)
-				begin
-					self.type_id = TYPE_MAP.to_a.find { |id, name| @type == name }.first
-				rescue
-					raise "unknown type #{type.inspect}"
-				end
-				# for the case of files, it is assumed that that was handled already
-				# note not dir?, so as not to override root's first_block
-				self.first_block = Dirent::EOT if type == :dir
-				if 0 #file?
-					#self.create_time_str = ?? #Types.load_time create_time_str
-					#self.modify_time_str = ?? #Types.load_time modify_time_str
-				else
-					self.create_time_str = 0.chr * 8
-					self.modify_time_str = 0.chr * 8
-				end
-				@values.pack PACK
-			end
-
-			def inspect
-				str = "#<Dirent:#{name.inspect}"
-				# perhaps i should remove the data snippet. its not that useful anymore.
-				if file?
-					tmp = read 9
-					data = tmp.length == 9 ? tmp[0, 5] + '...' : tmp
-					str << " size=#{size}" +
-						"#{time ? ' time=' + time.to_s.inspect : nil}" +
-						" data=#{data.inspect}"
-				else
-					# there is some dir specific stuff. like clsid, flags.
-				end
-				str + '>'
-			end
-
-			# --------
-			# and for creation of a dirent. don't like the name. is it a file or a directory?
-			# assign to type later? io will be empty.
-			def new_child type
-				child = Dirent.new ole, type
-				children << child
-				yield child if block_given?
-				child
-			end
-
-			def delete child
-				# remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone
-				raise "#{child.inspect} not a child of #{self.inspect}" unless @children.delete child
-				# free our blocks
-				child.open { |io| io.truncate 0 }
-			end
-
-			def self.copy src, dst
-				# copies the contents of src to dst. must be the same type. this will throw an
-				# error on copying to root. maybe this will recurse too much for big documents??
-				raise unless src.type == dst.type
-				dst.name = src.name
-				if src.dir?
-					src.children.each do |src_child|
-						dst.new_child(src_child.type) { |dst_child| Dirent.copy src_child, dst_child }
-					end
-				else
-					src.open do |src_io|
-						dst.open { |dst_io| IO.copy src_io, dst_io }
-					end
-				end
-			end
-		end
-	end
-end
-
-if $0 == __FILE__
-	puts Ole::Storage.open(ARGV[0]) { |ole| ole.root.to_tree }
-end
+require 'ole/storage/base'
+require 'ole/storage/file_system'
+require 'ole/storage/meta_data'
--- a/lib/ole/storage/base.rb
+++ b/lib/ole/storage/base.rb
@ -0,0 +1,916 @@
+require 'tempfile'
+
+require 'ole/base'
+require 'ole/types'
+require 'ole/ranges_io'
+
+module Ole # :nodoc:
+	#
+	# This class is the primary way the user interacts with an OLE storage file.
+	#
+	# = TODO
+	#
+	# * the custom header cruft for Header and Dirent needs some love.
+	# * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent,
+	#   and, in a manner of speaking, but arguably different, Storage itself.
+	#   they have differing api's which would be nice to rethink.
+	#   AllocationTable::Big must be created aot now, as it is used for all subsequent reads.
+	#
+	class Storage
+		# thrown for any bogus OLE file errors.
+		class FormatError < StandardError # :nodoc:
+		end
+
+		VERSION = '1.2.8.2'
+
+		# options used at creation time
+		attr_reader :params
+		# The top of the ole tree structure
+		attr_reader :root
+		# The tree structure in its original flattened form. only valid after #load, or #flush.
+		attr_reader :dirents
+		# The underlying io object to/from which the ole object is serialized, whether we
+		# should close it, and whether it is writeable
+		attr_reader :io, :close_parent, :writeable
+		# Low level internals, you probably shouldn't need to mess with these
+		attr_reader :header, :bbat, :sbat, :sb_file
+
+		# +arg+ should be either a filename, or an +IO+ object, and needs to be seekable.
+		# +mode+ is optional, and should be a regular mode string.
+		def initialize arg, mode=nil, params={}
+			params, mode = mode, nil if Hash === mode
+			params = {:update_timestamps => true}.merge(params)
+			@params = params
+	
+			# get the io object
+			@close_parent, @io = if String === arg
+				mode ||= 'rb'
+				[true, open(arg, mode)]
+			else
+				raise ArgumentError, 'unable to specify mode string with io object' if mode
+				[false, arg]
+			end
+			# do we have this file opened for writing? don't know of a better way to tell
+			# (unless we parse the mode string in the open case)
+			# hmmm, note that in ruby 1.9 this doesn't work anymore. which is all the more
+			# reason to use mode string parsing when available, and fall back to something like
+			# io.writeable? otherwise.
+			@writeable = begin
+				if mode
+					IO::Mode.new(mode).writeable?
+				else
+					@io.flush
+					# this is for the benefit of ruby-1.9
+					@io.syswrite('') if @io.respond_to?(:syswrite)
+					true
+				end
+			rescue IOError
+				false
+			end
+			# silence undefined warning in clear
+			@sb_file = nil
+			# if the io object has data, we should load it, otherwise start afresh
+			# this should be based on the mode string rather.
+			@io.size > 0 ? load : clear
+		end
+
+		# somewhat similar to File.open, the open class method allows a block form where
+		# the Ole::Storage object is automatically closed on completion of the block.
+		def self.open arg, mode=nil, params={}
+			ole = new arg, mode, params
+			if block_given?
+				begin   yield ole
+				ensure; ole.close
+				end
+			else ole
+			end
+		end
+
+		# load document from file.
+		#
+		# TODO: implement various allocationtable checks, maybe as a AllocationTable#fsck function :)
+		#
+		# 1. reterminate any chain not ending in EOC.
+		#    compare file size with actually allocated blocks per file.
+		# 2. pass through all chain heads looking for collisions, and making sure nothing points to them
+		#    (ie they are really heads). in both sbat and mbat
+		# 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
+		#    in the bat for them.
+		# 4. maybe a check of excess data. if there is data outside the bbat.truncate.length + 1 * block_size,
+		#    (eg what is used for truncate in #flush), then maybe add some sort of message about that. it
+		#    will be automatically thrown away at close time.
+		def load
+			# we always read 512 for the header block. if the block size ends up being different,
+			# what happens to the 109 fat entries. are there more/less entries?
+			@io.rewind
+			header_block = @io.read 512
+			@header = Header.new header_block
+
+			# create an empty bbat.
+			@bbat = AllocationTable::Big.new self
+			bbat_chain = header_block[Header::SIZE..-1].unpack 'V*'
+			mbat_block = @header.mbat_start
+			@header.num_mbat.times do
+				blocks = @bbat.read([mbat_block]).unpack 'V*'
+				mbat_block = blocks.pop
+				bbat_chain += blocks
+			end
+			# am i using num_bat in the right way?
+			@bbat.load @bbat.read(bbat_chain[0, @header.num_bat])
+	
+			# get block chain for directories, read it, then split it into chunks and load the
+			# directory entries. semantics changed - used to cut at first dir where dir.type == 0
+			@dirents = @bbat.read(@header.dirent_start).to_enum(:each_chunk, Dirent::SIZE).
+				map { |str| Dirent.new self, str }.reject { |d| d.type_id == 0 }
+
+			# now reorder from flat into a tree
+			# links are stored in some kind of balanced binary tree
+			# check that everything is visited at least, and at most once
+			# similarly with the blocks of the file.
+			# was thinking of moving this to Dirent.to_tree instead.
+			class << @dirents
+				def to_tree idx=0
+					return [] if idx == Dirent::EOT
+					d = self[idx]
+					d.children = to_tree d.child
+					raise FormatError, "directory #{d.inspect} used twice" if d.idx
+					d.idx = idx
+					to_tree(d.prev) + [d] + to_tree(d.next)
+				end
+			end
+
+			@root = @dirents.to_tree.first
+			Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry'
+			unused = @dirents.reject(&:idx).length
+			Log.warn "#{unused} unused directories" if unused > 0
+
+			# FIXME i don't currently use @header.num_sbat which i should
+			# hmm. nor do i write it. it means what exactly again?
+			# which mode to use here?
+			@sb_file = RangesIOResizeable.new @bbat, :first_block => @root.first_block, :size => @root.size
+			@sbat = AllocationTable::Small.new self
+			@sbat.load @bbat.read(@header.sbat_start)
+		end
+
+		def close
+			@sb_file.close
+			flush if @writeable
+			@io.close if @close_parent
+		end
+
+		# the flush method is the main "save" method. all file contents are always
+		# written directly to the file by the RangesIO objects, all this method does
+		# is write out all the file meta data - dirents, allocation tables, file header
+		# etc.
+		#
+		# maybe add an option to zero the padding, and any remaining avail blocks in the
+		# allocation table.
+		#
+		# TODO: long and overly complex. simplify and test better. eg, perhaps move serialization
+		# of bbat to AllocationTable::Big. 
+		def flush
+			# update root dirent, and flatten dirent tree
+			@root.name = 'Root Entry'
+			@root.first_block = @sb_file.first_block
+			@root.size = @sb_file.size
+			@dirents = @root.flatten
+
+			# serialize the dirents using the bbat
+			RangesIOResizeable.open @bbat, 'w', :first_block => @header.dirent_start do |io|
+				@dirents.each { |dirent| io.write dirent.to_s }
+				padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size
+				io.write 0.chr * padding
+				@header.dirent_start = io.first_block
+			end
+
+			# serialize the sbat
+			# perhaps the blocks used by the sbat should be marked with BAT?
+			RangesIOResizeable.open @bbat, 'w', :first_block => @header.sbat_start do |io|
+				io.write @sbat.to_s
+				@header.sbat_start = io.first_block
+				@header.num_sbat = @bbat.chain(@header.sbat_start).length
+			end
+
+			# create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using
+			# truncate. then when its time to write, convert that chain and some chunk of blocks at
+			# the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its
+			# done.
+			# this is perhaps not good, as we reclaim all bat blocks here, which
+			# may include the sbat we just wrote. FIXME
+			@bbat.map! do |b|
+				b == AllocationTable::BAT || b == AllocationTable::META_BAT ? AllocationTable::AVAIL : b
+			end
+	
+			# currently we use a loop. this could be better, but basically,
+			# the act of writing out the bat, itself requires blocks which get
+			# recorded in the bat.
+			#
+			# i'm sure that there'd be some simpler closed form solution to this. solve
+			# recursive func:
+			#
+			#   num_mbat_blocks = ceil(max((mbat_len - 109) * 4 / block_size, 0))
+			#   bbat_len = initial_bbat_len + num_mbat_blocks
+			#   mbat_len = ceil(bbat_len * 4 / block_size)
+			#
+			# the actual bbat allocation table is itself stored throughout the file, and that chain
+			# is stored in the initial blocks, and the mbat blocks.
+			num_mbat_blocks = 0
+			io = RangesIOResizeable.new @bbat, 'w', :first_block => AllocationTable::EOC
+			# truncate now, so that we can simplify size calcs - the mbat blocks will be appended in a
+			# contiguous chunk at the end.
+			# hmmm, i think this truncate should be matched with a truncate of the underlying io. if you
+			# delete a lot of stuff, and free up trailing blocks, the file size never shrinks. this can
+			# be fixed easily, add an io truncate
+			@bbat.truncate!
+			before = @io.size
+			@io.truncate @bbat.block_size * (@bbat.length + 1)
+			while true
+				# get total bbat size. equivalent to @bbat.to_s.length, but for the factoring in of
+				# the mbat blocks. we can't just add the mbat blocks directly to the bbat, as as this iteration
+				# progresses, more blocks may be needed for the bat itself (if there are no more gaps), and the
+				# mbat must remain contiguous.
+				bbat_data_len = ((@bbat.length + num_mbat_blocks) * 4 / @bbat.block_size.to_f).ceil * @bbat.block_size
+				# now storing the excess mbat blocks also increases the size of the bbat:
+				new_num_mbat_blocks = ([bbat_data_len / @bbat.block_size - 109, 0].max * 4 / (@bbat.block_size.to_f - 4)).ceil
+				if new_num_mbat_blocks != num_mbat_blocks
+					# need more space for the mbat.
+					num_mbat_blocks = new_num_mbat_blocks
+				elsif io.size != bbat_data_len
+					# need more space for the bat
+					# this may grow the bbat, depending on existing available blocks
+					io.truncate bbat_data_len
+				else
+					break
+				end
+			end
+
+			# now extract the info we want:
+			ranges = io.ranges
+			bbat_chain = @bbat.chain io.first_block
+			io.close
+			bbat_chain.each { |b| @bbat[b] = AllocationTable::BAT }
+			# tack on the mbat stuff
+			@header.num_bat = bbat_chain.length
+			mbat_blocks = (0...num_mbat_blocks).map do
+				block = @bbat.free_block
+				@bbat[block] = AllocationTable::META_BAT
+				block
+			end
+			@header.mbat_start = mbat_blocks.first || AllocationTable::EOC
+
+			# now finally write the bbat, using a not resizable io.
+			# the mode here will be 'r', which allows write atm. 
+			RangesIO.open(@io, :ranges => ranges) { |f| f.write @bbat.to_s }
+
+			# this is the mbat. pad it out.
+			bbat_chain += [AllocationTable::AVAIL] * [109 - bbat_chain.length, 0].max
+			@header.num_mbat = num_mbat_blocks
+			if num_mbat_blocks != 0
+				# write out the mbat blocks now. first of all, where are they going to be?
+				mbat_data = bbat_chain[109..-1]
+				# expand the mbat_data to include the linked list forward pointers.
+				mbat_data = mbat_data.to_enum(:each_slice, @bbat.block_size / 4 - 1).to_a.
+					zip(mbat_blocks[1..-1] + [nil]).map { |a, b| b ? a + [b] : a }
+				# pad out the last one.
+				mbat_data.last.push(*([AllocationTable::AVAIL] * (@bbat.block_size / 4 - mbat_data.last.length)))
+				RangesIO.open @io, :ranges => @bbat.ranges(mbat_blocks) do |f|
+					f.write mbat_data.flatten.pack('V*')
+				end
+			end
+
+			# now seek back and write the header out
+			@io.seek 0
+			@io.write @header.to_s + bbat_chain[0, 109].pack('V*')
+			@io.flush
+		end
+
+		def clear
+			# initialize to equivalent of loading an empty ole document.
+			Log.warn 'creating new ole storage object on non-writable io' unless @writeable
+			@header = Header.new
+			@bbat = AllocationTable::Big.new self
+			@root = Dirent.new self, :type => :root, :name => 'Root Entry'
+			@dirents = [@root]
+			@root.idx = 0
+			@sb_file.close if @sb_file
+			@sb_file = RangesIOResizeable.new @bbat, :first_block => AllocationTable::EOC
+			@sbat = AllocationTable::Small.new self
+			# throw everything else the hell away
+			@io.truncate 0
+		end
+
+		# could be useful with mis-behaving ole documents. or to just clean them up.
+		def repack temp=:file
+			case temp
+			when :file
+				Tempfile.open 'ole-repack' do |io|
+					io.binmode
+					repack_using_io io
+				end
+			when :mem;  StringIO.open('', &method(:repack_using_io))
+			else raise ArgumentError, "unknown temp backing #{temp.inspect}"
+			end
+		end
+
+		def repack_using_io temp_io
+			@io.rewind
+			IO.copy @io, temp_io
+			clear
+			Storage.open temp_io, nil, @params do |temp_ole|
+				#temp_ole.root.type = :dir
+				Dirent.copy temp_ole.root, root
+			end
+		end
+
+		def bat_for_size size
+			# note >=, not > previously.
+			size >= @header.threshold ? @bbat : @sbat
+		end
+
+		def inspect
+			"#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>"
+		end
+
+		#
+		# A class which wraps the ole header
+		#
+		# Header.new can be both used to load from a string, or to create from
+		# defaults. Serialization is accomplished with the #to_s method.
+		#
+		class Header < Struct.new(
+				:magic, :clsid, :minor_ver, :major_ver, :byte_order, :b_shift, :s_shift,
+				:reserved, :csectdir, :num_bat, :dirent_start, :transacting_signature, :threshold,
+				:sbat_start, :num_sbat, :mbat_start, :num_mbat
+			)
+			PACK = 'a8 a16 v2 a2 v2 a6 V3 a4 V5'
+			SIZE = 0x4c
+			# i have seen it pointed out that the first 4 bytes of hex,
+			# 0xd0cf11e0, is supposed to spell out docfile. hmmm :)
+			MAGIC = "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1"  # expected value of Header#magic
+			# what you get if creating new header from scratch.
+			# AllocationTable::EOC isn't available yet. meh.
+			EOC = 0xfffffffe
+			DEFAULT = [
+				MAGIC, 0.chr * 16, 59, 3, "\xfe\xff", 9, 6,
+				0.chr * 6, 0, 1, EOC, 0.chr * 4,
+				4096, EOC, 0, EOC, 0
+			]
+
+			def initialize values=DEFAULT
+				values = values.unpack(PACK) if String === values
+				super(*values)
+				validate!
+			end
+
+			def to_s
+				to_a.pack PACK
+			end
+
+			def validate!
+				raise FormatError, "OLE2 signature is invalid" unless magic == MAGIC
+				if num_bat == 0 or # is that valid for a completely empty file?
+					 # not sure about this one. basically to do max possible bat given size of mbat
+					 num_bat > 109 && num_bat > 109 + num_mbat * (1 << b_shift - 2) or
+					 # shouldn't need to use the mbat as there is enough space in the header block
+					 num_bat < 109 && num_mbat != 0 or
+					 # given the size of the header is 76, if b_shift <= 6, blocks address the header.
+					 s_shift > b_shift or b_shift <= 6 or b_shift >= 31 or
+					 # we only handle little endian
+					 byte_order != "\xfe\xff"
+					raise FormatError, "not valid OLE2 structured storage file"
+				end
+				# relaxed this, due to test-msg/qwerty_[1-3]*.msg they all had
+				# 3 for this value. 
+				# transacting_signature != "\x00" * 4 or
+				if threshold != 4096 or
+					 num_mbat == 0 && mbat_start != AllocationTable::EOC or
+					 reserved != "\x00" * 6
+					Log.warn "may not be a valid OLE2 structured storage file"
+				end
+				true
+			end
+		end
+
+		#
+		# +AllocationTable+'s hold the chains corresponding to files. Given
+		# an initial index, <tt>AllocationTable#chain</tt> follows the chain, returning
+		# the blocks that make up that file.
+		#
+		# There are 2 allocation tables, the bbat, and sbat, for big and small
+		# blocks respectively. The block chain should be loaded using either
+		# <tt>Storage#read_big_blocks</tt> or <tt>Storage#read_small_blocks</tt>
+		# as appropriate.
+		#
+		# Whether or not big or small blocks are used for a file depends on
+		# whether its size is over the <tt>Header#threshold</tt> level.
+		#
+		# An <tt>Ole::Storage</tt> document is serialized as a series of directory objects,
+		# which are stored in blocks throughout the file. The blocks are either
+		# big or small, and are accessed using the <tt>AllocationTable</tt>.
+		#
+		# The bbat allocation table's data is stored in the spare room in the header
+		# block, and in extra blocks throughout the file as referenced by the meta
+		# bat.  That chain is linear, as there is no higher level table.
+		#
+		# AllocationTable.new is used to create an empty table. It can parse a string
+		# with the #load method. Serialization is accomplished with the #to_s method.
+		#
+		class AllocationTable < Array
+			# a free block (I don't currently leave any blocks free), although I do pad out
+			# the allocation table with AVAIL to the block size.
+			AVAIL		 = 0xffffffff
+			EOC			 = 0xfffffffe # end of a chain
+			# these blocks are used for storing the allocation table chains
+			BAT			 = 0xfffffffd
+			META_BAT = 0xfffffffc
+
+			attr_reader :ole, :io, :block_size
+			def initialize ole
+				@ole = ole
+				@sparse = true
+				super()
+			end
+
+			def load data
+				replace data.unpack('V*')
+			end
+
+			def truncate
+				# this strips trailing AVAILs. come to think of it, this has the potential to break
+				# bogus ole. if you terminate using AVAIL instead of EOC, like I did before. but that is
+				# very broken. however, if a chain ends with AVAIL, it should probably be fixed to EOC
+				# at load time.
+				temp = reverse
+				not_avail = temp.find { |b| b != AVAIL } and temp = temp[temp.index(not_avail)..-1]
+				temp.reverse
+			end
+
+			def truncate!
+				replace truncate
+			end
+
+			def to_s
+				table = truncate
+				# pad it out some
+				num = @ole.bbat.block_size / 4
+				# do you really use AVAIL? they probably extend past end of file, and may shortly
+				# be used for the bat. not really good.
+				table += [AVAIL] * (num - (table.length % num)) if (table.length % num) != 0
+				table.pack 'V*'
+			end
+
+			# rewrote this to be non-recursive as it broke on a large attachment
+			# chain with a stack error
+			def chain idx
+				a = []
+				until idx >= META_BAT
+					raise FormatError, "broken allocationtable chain" if idx < 0 || idx > length
+					a << idx
+					idx = self[idx]
+				end
+				Log.warn "invalid chain terminator #{idx}" unless idx == EOC
+				a
+			end
+			
+			# Turn a chain (an array given by +chain+) of blocks (optionally
+			# truncated to +size+) into an array of arrays describing the stretches of
+			# bytes in the file that it belongs to.
+			#
+			# The blocks are Big or Small blocks depending on the table type.
+			def blocks_to_ranges chain, size=nil
+				# truncate the chain if required
+				chain = chain[0...(size.to_f / block_size).ceil] if size
+				# convert chain to ranges of the block size
+				ranges = chain.map { |i| [block_size * i, block_size] }
+				# truncate final range if required
+				ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size
+				ranges
+			end
+
+			def ranges chain, size=nil
+				chain = self.chain(chain) unless Array === chain
+				blocks_to_ranges chain, size
+			end
+
+			# quick shortcut. chain can be either a head (in which case the table is used to
+			# turn it into a chain), or a chain. it is converted to ranges, then to rangesio.
+			def open chain, size=nil, &block
+				RangesIO.open @io, :ranges => ranges(chain, size), &block
+			end
+
+			def read chain, size=nil
+				open chain, size, &:read
+			end
+
+			# catch any method that may add an AVAIL somewhere in the middle, thus invalidating
+			# the @sparse speedup for free_block. annoying using eval, but define_method won't
+			# work for this.
+			# FIXME
+			[:map!, :collect!].each do |name|
+				eval <<-END
+					def #{name}(*args, &block)
+						@sparse = true
+						super
+					end
+				END
+			end
+
+			def []= idx, val
+				@sparse = true if val == AVAIL
+				super
+			end
+
+			def free_block
+				if @sparse
+					i = index(AVAIL) and return i
+				end
+				@sparse = false
+				push AVAIL
+				length - 1
+			end
+
+			# must return first_block. modifies +blocks+ in place
+			def resize_chain blocks, size
+				new_num_blocks = (size / block_size.to_f).ceil
+				old_num_blocks = blocks.length
+				if new_num_blocks < old_num_blocks
+					# de-allocate some of our old blocks. TODO maybe zero them out in the file???
+					(new_num_blocks...old_num_blocks).each { |i| self[blocks[i]] = AVAIL }
+					self[blocks[new_num_blocks-1]] = EOC if new_num_blocks > 0
+					blocks.slice! new_num_blocks..-1
+				elsif new_num_blocks > old_num_blocks
+					# need some more blocks.
+					last_block = blocks.last
+					(new_num_blocks - old_num_blocks).times do
+						block = free_block
+						# connect the chain. handle corner case of blocks being [] initially
+						self[last_block] = block if last_block
+						blocks << block
+						last_block = block
+						self[last_block] = EOC
+					end
+				end
+				# update ranges, and return that also now
+				blocks
+			end
+
+			class Big < AllocationTable
+				def initialize(*args)
+					super
+					@block_size = 1 << @ole.header.b_shift
+					@io = @ole.io
+				end
+
+				# Big blocks are kind of -1 based, in order to not clash with the header.
+				def blocks_to_ranges blocks, size
+					super blocks.map { |b| b + 1 }, size
+				end
+			end
+
+			class Small < AllocationTable
+				def initialize(*args)
+					super
+					@block_size = 1 << @ole.header.s_shift
+					@io = @ole.sb_file
+				end
+			end
+		end
+
+		# like normal RangesIO, but Ole::Storage specific. the ranges are backed by an
+		# AllocationTable, and can be resized. used for read/write to 2 streams:
+		# 1. serialized dirent data
+		# 2. sbat table data
+		# 3. all dirents but through RangesIOMigrateable below
+		#
+		# Note that all internal access to first_block is through accessors, as it is sometimes
+		# useful to redirect it.
+		class RangesIOResizeable < RangesIO
+			attr_reader   :bat
+			attr_accessor :first_block
+			def initialize bat, mode='r', params={}
+				mode, params = 'r', mode if Hash === mode
+				first_block, size = params.values_at :first_block, :size
+				raise ArgumentError, 'must specify first_block' unless first_block
+				@bat = bat
+				self.first_block = first_block
+				# we now cache the blocks chain, for faster resizing.
+				@blocks = @bat.chain first_block
+				super @bat.io, mode, :ranges => @bat.ranges(@blocks, size)
+			end
+
+			def truncate size
+				# note that old_blocks is != @ranges.length necessarily. i'm planning to write a
+				# merge_ranges function that merges sequential ranges into one as an optimization.
+				@bat.resize_chain @blocks, size
+				@ranges = @bat.ranges @blocks, size
+				@pos = @size if @pos > size
+				self.first_block = @blocks.empty? ? AllocationTable::EOC : @blocks.first
+
+				# don't know if this is required, but we explicitly request our @io to grow if necessary
+				# we never shrink it though. maybe this belongs in allocationtable, where smarter decisions
+				# can be made.
+				# maybe its ok to just seek out there later??
+				max = @ranges.map { |pos, len| pos + len }.max || 0
+				@io.truncate max if max > @io.size
+
+				@size = size
+			end
+		end
+
+		# like RangesIOResizeable, but Ole::Storage::Dirent specific. provides for migration
+		# between bats based on size, and updating the dirent.
+		class RangesIOMigrateable < RangesIOResizeable
+			attr_reader :dirent
+			def initialize dirent, mode='r'
+				@dirent = dirent
+				super @dirent.ole.bat_for_size(@dirent.size), mode,
+					:first_block => @dirent.first_block, :size => @dirent.size
+			end
+
+			def truncate size
+				bat = @dirent.ole.bat_for_size size
+				if bat.class != @bat.class
+					# bat migration needed! we need to backup some data. the amount of data
+					# should be <= @ole.header.threshold, so we can just hold it all in one buffer.
+					# backup this
+					pos = @pos
+					@pos = 0
+					keep = read [@size, size].min
+					# this does a normal truncate to 0, removing our presence from the old bat, and
+					# rewrite the dirent's first_block
+					super 0
+					@bat = bat
+					# just change the underlying io from right under everyone :)
+					@io = bat.io
+					# important to do this now, before the write. as the below write will always
+					# migrate us back to sbat! this will now allocate us +size+ in the new bat.
+					super
+					@pos = 0
+					write keep
+					@pos = pos
+				else
+					super
+				end
+				# now just update the file
+				@dirent.size = size
+			end
+
+			# forward this to the dirent
+			def first_block
+				@dirent.first_block
+			end
+
+			def first_block= val
+				@dirent.first_block = val
+			end
+		end
+
+		#
+		# A class which wraps an ole directory entry. Can be either a directory
+		# (<tt>Dirent#dir?</tt>) or a file (<tt>Dirent#file?</tt>)
+		#
+		# Most interaction with <tt>Ole::Storage</tt> is through this class.
+		# The 2 most important functions are <tt>Dirent#children</tt>, and
+		# <tt>Dirent#data</tt>.
+		# 
+		# was considering separate classes for dirs and files. some methods/attrs only
+		# applicable to one or the other.
+		#
+		# As with the other classes, #to_s performs the serialization.
+		#
+		class Dirent < Struct.new(
+				:name_utf16, :name_len, :type_id, :colour, :prev, :next, :child,
+				:clsid, :flags, # dirs only
+				:create_time_str, :modify_time_str, # files only
+				:first_block, :size, :reserved
+			)
+			include RecursivelyEnumerable
+
+			PACK = 'a64 v C C V3 a16 V a8 a8 V2 a4'
+			SIZE = 128
+			TYPE_MAP = {
+				# this is temporary
+				0 => :empty,
+				1 => :dir,
+				2 => :file,
+				5 => :root
+			}
+			# something to do with the fact that the tree is supposed to be red-black
+			COLOUR_MAP = {
+				0 => :red,
+				1 => :black
+			}
+			# used in the next / prev / child stuff to show that the tree ends here.
+			# also used for first_block for directory.
+			EOT = 0xffffffff
+			DEFAULT = [
+				0.chr * 2, 2, 0, # will get overwritten
+				1, EOT, EOT, EOT,
+				0.chr * 16, 0, nil, nil,
+				AllocationTable::EOC, 0, 0.chr * 4
+			]
+
+			# i think its just used by the tree building
+			attr_accessor :idx
+			# This returns all the children of this +Dirent+. It is filled in
+			# when the tree structure is recreated.
+			attr_accessor :children
+			attr_accessor :name
+			attr_reader :ole, :type, :create_time, :modify_time
+			def initialize ole, values=DEFAULT, params={}
+				@ole = ole				
+				values, params = DEFAULT, values if Hash === values
+				values = values.unpack(PACK) if String === values
+				super(*values)
+
+				# extra parsing from the actual struct values
+				@name = params[:name] || Types::Variant.load(Types::VT_LPWSTR, name_utf16[0...name_len])
+				@type = if params[:type]
+					unless TYPE_MAP.values.include?(params[:type])
+						raise ArgumentError, "unknown type #{params[:type].inspect}"
+					end
+					params[:type]
+				else
+					TYPE_MAP[type_id] or raise FormatError, "unknown type_id #{type_id.inspect}"
+				end
+
+				# further extra type specific stuff
+				if file?
+					default_time = @ole.params[:update_timestamps] ? Time.now : nil
+					@create_time ||= default_time
+					@modify_time ||= default_time
+					@create_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if create_time_str
+					@modify_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if modify_time_str
+					@children = nil
+				else
+					@create_time = nil
+					@modify_time = nil
+					self.size = 0 unless @type == :root
+					@children = []
+				end
+				
+				# to silence warnings. used for tree building at load time
+				# only.
+				@idx = nil
+			end
+
+			def open mode='r'
+				raise Errno::EISDIR unless file?
+				io = RangesIOMigrateable.new self, mode
+				# TODO work on the mode string stuff a bit more.
+				# maybe let the io object know about the mode, so it can refuse
+				# to work for read/write appropriately. maybe redefine all unusable
+				# methods using singleton class to throw errors.
+				# for now, i just want to implement truncation on use of 'w'. later,
+				# i need to do 'a' etc.
+				case mode
+				when 'r', 'r+'
+					# as i don't enforce reading/writing, nothing changes here. kind of
+					# need to enforce tt if i want modify times to work better.
+					@modify_time = Time.now if mode == 'r+'
+				when 'w'
+					@modify_time = Time.now
+				#	io.truncate 0
+				#else
+				#	raise NotImplementedError, "unsupported mode - #{mode.inspect}"
+				end
+				if block_given?
+					begin   yield io
+					ensure; io.close
+					end
+				else io
+				end
+			end
+
+			def read limit=nil
+				open { |io| io.read limit }
+			end
+
+			def file?
+				type == :file
+			end
+
+			def dir?
+				# to count root as a dir.
+				!file?
+			end
+
+			# maybe need some options regarding case sensitivity.
+			def / name
+				children.find { |child| name === child.name }
+			end
+
+			def [] idx
+				if String === idx
+					#warn 'String form of Dirent#[] is deprecated'
+					self / idx
+				else
+					super
+				end
+			end
+
+			# move to ruby-msg. and remove from here
+			def time
+				#warn 'Dirent#time is deprecated'
+				create_time || modify_time
+			end
+
+			def each_child(&block)
+				@children.each(&block)
+			end
+
+			# flattens the tree starting from here into +dirents+. note it modifies its argument.
+			def flatten dirents=[]
+				@idx = dirents.length
+				dirents << self
+				if file?
+					self.prev = self.next = self.child = EOT
+				else
+					children.each { |child| child.flatten dirents } 
+					self.child = Dirent.flatten_helper children
+				end
+				dirents
+			end
+
+			# i think making the tree structure optimized is actually more complex than this, and
+			# requires some intelligent ordering of the children based on names, but as long as
+			# it is valid its ok.
+			# actually, i think its ok. gsf for example only outputs a singly-linked-list, where
+			# prev is always EOT.
+			def self.flatten_helper children
+				return EOT if children.empty?
+				i = children.length / 2
+				this = children[i]
+				this.prev, this.next = [(0...i), (i+1..-1)].map { |r| flatten_helper children[r] }
+				this.idx
+			end
+
+			def to_s
+				tmp = Types::Variant.dump(Types::VT_LPWSTR, name)
+				tmp = tmp[0, 62] if tmp.length > 62
+				tmp += 0.chr * 2
+				self.name_len = tmp.length
+				self.name_utf16 = tmp + 0.chr * (64 - tmp.length)
+				# type_id can perhaps be set in the initializer, as its read only now.
+				self.type_id = TYPE_MAP.to_a.find { |id, name| @type == name }.first
+				# for the case of files, it is assumed that that was handled already
+				# note not dir?, so as not to override root's first_block
+				self.first_block = Dirent::EOT if type == :dir
+				if file?
+					# this is messed up. it changes the time stamps regardless of whether the file
+					# was actually touched. instead, any open call with a writeable mode, should update
+					# the modify time. create time would be set in new.
+					if @ole.params[:update_timestamps]
+						self.create_time_str = Types::Variant.dump Types::VT_FILETIME, @create_time
+						self.modify_time_str = Types::Variant.dump Types::VT_FILETIME, @modify_time
+					end
+				else
+					self.create_time_str = 0.chr * 8
+					self.modify_time_str = 0.chr * 8
+				end
+				to_a.pack PACK
+			end
+
+			def inspect
+				str = "#<Dirent:#{name.inspect}"
+				# perhaps i should remove the data snippet. its not that useful anymore.
+				# there is also some dir specific stuff. like clsid, flags, that i should
+				# probably include
+				if file?
+					tmp = read 9
+					data = tmp.length == 9 ? tmp[0, 5] + '...' : tmp
+					str << " size=#{size}" +
+						"#{modify_time ? ' modify_time=' + modify_time.to_s.inspect : nil}" +
+						" data=#{data.inspect}"
+				end
+				str + '>'
+			end
+
+			def delete child
+				# remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone
+				raise ArgumentError, "#{child.inspect} not a child of #{self.inspect}" unless @children.delete child
+				# free our blocks
+				child.open { |io| io.truncate 0 }
+			end
+
+			def self.copy src, dst
+				# copies the contents of src to dst. must be the same type. this will throw an
+				# error on copying to root. maybe this will recurse too much for big documents??
+				raise ArgumentError, 'differing types' if src.file? and !dst.file?
+				dst.name = src.name
+				if src.dir?
+					src.children.each do |src_child|
+						dst_child = Dirent.new dst.ole, :type => src_child.type
+						dst.children << dst_child
+						Dirent.copy src_child, dst_child
+					end
+				else
+					src.open do |src_io|
+						dst.open { |dst_io| IO.copy src_io, dst_io }
+					end
+				end
+			end
+		end
+	end
+end
+
--- a/lib/ole/storage/file_system.rb
+++ b/lib/ole/storage/file_system.rb
@ -0,0 +1,423 @@
+#
+# = Introduction
+#
+# This file intends to provide file system-like api support, a la <tt>zip/zipfilesystem</tt>.
+#
+# = TODO
+# 
+# - need to implement some more IO functions on RangesIO, like #puts, #print
+#   etc, like AbstractOutputStream from zipfile.
+#
+# - check Dir.mkdir, and File.open, and File.rename, to add in filename 
+#   length checks (max 32 / 31 or something).
+#   do the automatic truncation, and add in any necessary warnings.
+#
+# - File.split('a/') == File.split('a') == ['.', 'a']
+#   the implication of this, is that things that try to force directory
+#   don't work. like, File.rename('a', 'b'), should work if a is a file
+#   or directory, but File.rename('a/', 'b') should only work if a is
+#   a directory. tricky, need to clean things up a bit more.
+#   i think a general path name => dirent method would work, with flags
+#   about what should raise an error. 
+#
+# - Need to look at streamlining things after getting all the tests passing,
+#   as this file's getting pretty long - almost half the real implementation.
+#   and is probably more inefficient than necessary.
+#   too many exceptions in the expected path of certain functions.
+#
+# - should look at profiles before and after switching ruby-msg to use
+#   the filesystem api.
+#
+
+require 'ole/storage'
+
+module Ole # :nodoc:
+	class Storage
+		def file
+			@file ||= FileClass.new self
+		end
+
+		def dir
+			@dir ||= DirClass.new self
+		end
+
+		# tries to get a dirent for path. return nil if it doesn't exist
+		# (change it)
+		def dirent_from_path path
+			dirent = @root
+			path = file.expand_path path
+			path = path.sub(/^\/*/, '').sub(/\/*$/, '').split(/\/+/)
+			until path.empty?
+				return nil if dirent.file?
+				return nil unless dirent = dirent/path.shift
+			end
+			dirent
+		end
+
+		class FileClass
+			class Stat
+				attr_reader :ftype, :size, :blocks, :blksize
+				attr_reader :nlink, :uid, :gid, :dev, :rdev, :ino
+				def initialize dirent
+					@dirent = dirent
+					@size = dirent.size
+					if file?
+						@ftype = 'file'
+						bat = dirent.ole.bat_for_size(dirent.size)
+						@blocks = bat.chain(dirent.first_block).length
+						@blksize = bat.block_size
+					else
+						@ftype = 'directory'
+						@blocks = 0
+						@blksize = 0
+					end
+					# a lot of these are bogus. ole file format has no analogs
+					@nlink = 1
+					@uid, @gid = 0, 0
+					@dev, @rdev = 0, 0
+					@ino = 0
+					# need to add times - atime, mtime, ctime. 
+				end
+
+				alias rdev_major :rdev
+				alias rdev_minor :rdev
+
+				def file?
+					@dirent.file?
+				end
+
+				def directory?
+					@dirent.dir?
+				end
+				
+				def size?
+					size if file?
+				end
+
+				def inspect
+					pairs = (instance_variables - ['@dirent']).map do |n|
+						"#{n[1..-1]}=#{instance_variable_get n}"
+					end
+					"#<#{self.class} #{pairs * ', '}>"
+				end
+			end
+
+			def initialize ole
+				@ole = ole
+			end
+
+			def expand_path path
+				# get the raw stored pwd value (its blank for root)
+				pwd = @ole.dir.instance_variable_get :@pwd
+				# its only absolute if it starts with a '/'
+				path = "#{pwd}/#{path}" unless path =~ /^\//
+				# at this point its already absolute. we use File.expand_path
+				# just for the .. and . handling
+				# No longer use RUBY_PLATFORM =~ /win/ as it matches darwin. better way?
+				File.expand_path(path)[File::ALT_SEPARATOR == "\\" ? (2..-1) : (0..-1)]
+			end
+
+			# +orig_path+ is just so that we can use the requested path
+			# in the error messages even if it has been already modified
+			def dirent_from_path path, orig_path=nil
+				orig_path ||= path
+				dirent = @ole.dirent_from_path path
+				raise Errno::ENOENT,  orig_path unless dirent
+				raise Errno::EISDIR, orig_path if dirent.dir?
+				dirent
+			end
+			private :dirent_from_path
+
+			def exists? path
+				!!@ole.dirent_from_path(path)
+			end
+			alias exist? :exists?
+
+			def file? path
+				dirent = @ole.dirent_from_path path
+				dirent and dirent.file?
+			end
+
+			def directory? path
+				dirent = @ole.dirent_from_path path
+				dirent and dirent.dir?
+			end
+
+			def open path, mode='r', &block
+				if IO::Mode.new(mode).create?
+					begin
+						dirent = dirent_from_path path
+					rescue Errno::ENOENT
+						# maybe instead of repeating this everywhere, i should have
+						# a get_parent_dirent function.
+						parent_path, basename = File.split expand_path(path)
+						parent = @ole.dir.send :dirent_from_path, parent_path, path
+						parent.children << dirent = Dirent.new(@ole, :type => :file, :name => basename)
+					end
+				else
+					dirent = dirent_from_path path
+				end
+				dirent.open mode, &block
+			end
+
+			# explicit wrapper instead of alias to inhibit block
+			def new path, mode='r'
+				open path, mode
+			end
+
+			def size path
+				dirent_from_path(path).size
+			rescue Errno::EISDIR
+				# kind of arbitrary. I'm getting 4096 from ::File, but
+				# the zip tests want 0.
+				0
+			end
+			
+			def size? path
+				dirent_from_path(path).size
+				# any other exceptions i need to rescue?
+			rescue Errno::ENOENT, Errno::EISDIR
+				nil
+			end
+
+			def stat path
+				# we do this to allow dirs.
+				dirent = @ole.dirent_from_path path
+				raise Errno::ENOENT, path unless dirent
+				Stat.new dirent
+			end
+
+			def read path
+				open path, &:read
+			end
+
+			# most of the work this function does is moving the dirent between
+			# 2 parents. the actual name changing is quite simple.
+			# File.rename can move a file into another folder, which is why i've
+			# done it too, though i think its not always possible...
+			#
+			# FIXME File.rename can be used for directories too....
+			def rename from_path, to_path
+				# check what we want to rename from exists. do it this
+				# way to allow directories.
+				dirent = @ole.dirent_from_path from_path
+				raise Errno::ENOENT, from_path unless dirent
+				# delete what we want to rename to if necessary
+				begin
+					unlink to_path
+				rescue Errno::ENOENT
+					# we actually get here, but rcov doesn't think so. add 1 + 1 to
+					# keep rcov happy for now... :)
+					1 + 1
+				end
+				# reparent the dirent
+				from_parent_path, from_basename = File.split expand_path(from_path)
+				to_parent_path, to_basename = File.split expand_path(to_path)
+				from_parent = @ole.dir.send :dirent_from_path, from_parent_path, from_path
+				to_parent = @ole.dir.send :dirent_from_path, to_parent_path, to_path
+				from_parent.children.delete dirent
+				# and also change its name
+				dirent.name = to_basename
+				to_parent.children << dirent
+				0
+			end
+
+			# crappy copy from Dir.
+			def unlink(*paths)
+				paths.each do |path|
+					dirent = @ole.dirent_from_path path
+					# i think we should free all of our blocks from the
+					# allocation table.
+					# i think if you run repack, all free blocks should get zeroed,
+					# but currently the original data is there unmodified.
+					open(path) { |f| f.truncate 0 }
+					# remove ourself from our parent, so we won't be part of the dir
+					# tree at save time.
+					parent_path, basename = File.split expand_path(path)
+					parent = @ole.dir.send :dirent_from_path, parent_path, path
+					parent.children.delete dirent
+				end
+				paths.length # hmmm. as per ::File ?
+			end
+			alias delete :unlink
+		end
+
+		#
+		# an *instance* of this class is supposed to provide similar methods
+		# to the class methods of Dir itself.
+		#
+		# pretty complete. like zip/zipfilesystem's implementation, i provide
+		# everything except chroot and glob. glob could be done with a glob
+		# to regex regex, and then simply match in the entries array... although
+		# recursive glob complicates that somewhat.
+		#
+		# Dir.chroot, Dir.glob, Dir.[], and Dir.tmpdir is the complete list.
+		class DirClass
+			def initialize ole
+				@ole = ole
+				@pwd = ''
+			end
+
+			# +orig_path+ is just so that we can use the requested path
+			# in the error messages even if it has been already modified
+			def dirent_from_path path, orig_path=nil
+				orig_path ||= path
+				dirent = @ole.dirent_from_path path
+				raise Errno::ENOENT,  orig_path unless dirent
+				raise Errno::ENOTDIR, orig_path unless dirent.dir?
+				dirent
+			end
+			private :dirent_from_path
+
+			def open path
+				dir = Dir.new path, entries(path)
+				if block_given?
+					yield dir
+				else
+					dir
+				end
+			end
+
+			# as for file, explicit alias to inhibit block
+			def new path
+				open path
+			end
+
+			# pwd is always stored without the trailing slash. we handle
+			# the root case here
+			def pwd
+				if @pwd.empty?
+					'/'
+				else
+					@pwd
+				end
+			end
+			alias getwd :pwd
+
+			def chdir orig_path
+				# make path absolute, squeeze slashes, and remove trailing slash
+				path = @ole.file.expand_path(orig_path).gsub(/\/+/, '/').sub(/\/$/, '')
+				# this is just for the side effects of the exceptions if invalid
+				dirent_from_path path, orig_path
+				if block_given?
+					old_pwd = @pwd
+					begin
+						@pwd = path
+						yield
+					ensure
+						@pwd = old_pwd
+					end
+				else
+					@pwd = path
+					0
+				end
+			end	
+
+			def entries path
+				dirent = dirent_from_path path
+				# Not sure about adding on the dots...
+				entries = %w[. ..] + dirent.children.map(&:name)
+				# do some checks about un-reachable files
+				seen = {}
+				entries.each do |n|
+					Log.warn "inaccessible file (filename contains slash) - #{n.inspect}" if n['/']
+					Log.warn "inaccessible file (duplicate filename) - #{n.inspect}" if seen[n]
+					seen[n] = true
+				end
+				entries
+			end
+
+			def foreach path, &block
+				entries(path).each(&block)
+			end
+
+			# there are some other important ones, like:
+			# chroot (!), glob etc etc. for now, i think
+			def mkdir path
+				# as for rmdir below:
+				parent_path, basename = File.split @ole.file.expand_path(path)
+				# note that we will complain about the full path despite accessing
+				# the parent path. this is consistent with ::Dir
+				parent = dirent_from_path parent_path, path
+				# now, we first should ensure that it doesn't already exist
+				# either as a file or a directory.
+				raise Errno::EEXIST, path if parent/basename
+				parent.children << Dirent.new(@ole, :type => :dir, :name => basename)
+				0
+			end
+
+			def rmdir path
+				dirent = dirent_from_path path
+				raise Errno::ENOTEMPTY, path unless dirent.children.empty?
+
+				# now delete it, how to do that? the canonical representation that is
+				# maintained is the root tree, and the children array. we must remove it
+				# from the children array.
+				# we need the parent then. this sucks but anyway:
+				# we need to split the path. but before we can do that, we need
+				# to expand it first. eg. say we need the parent to unlink
+				# a/b/../c. the parent should be a, not a/b/.., or a/b.
+				parent_path, basename = File.split @ole.file.expand_path(path)
+				# this shouldn't be able to fail if the above didn't
+				parent = dirent_from_path parent_path
+				# note that the way this currently works, on save and repack time this will get
+				# reflected. to work properly, ie to make a difference now it would have to re-write
+				# the dirent. i think that Ole::Storage#close will handle that. and maybe include a
+				# #repack.
+				parent.children.delete dirent
+				0 # hmmm. as per ::Dir ?
+			end
+			alias delete :rmdir
+			alias unlink :rmdir
+
+			# note that there is nothing remotely ole specific about
+			# this class. it simply provides the dir like sequential access
+			# methods on top of an array.
+			# hmm, doesn't throw the IOError's on use of a closed directory...
+			class Dir
+				include Enumerable
+
+				attr_reader :path
+				def initialize path, entries
+					@path, @entries, @pos = path, entries, 0
+					@closed = false
+				end
+				
+				def pos
+					raise IOError if @closed
+					@pos
+				end
+
+				def each(&block)
+					raise IOError if @closed
+					@entries.each(&block)
+				end
+
+				def close
+					@closed = true
+				end
+
+				def read
+					raise IOError if @closed
+					@entries[pos]
+				ensure
+					@pos += 1 if pos < @entries.length
+				end
+
+				def pos= pos
+					raise IOError if @closed
+					@pos = [[0, pos].max, @entries.length].min
+				end
+
+				def rewind
+					raise IOError if @closed
+					@pos = 0
+				end
+
+				alias tell :pos
+				alias seek :pos=
+			end
+		end
+	end
+end
+
--- a/lib/ole/storage/meta_data.rb
+++ b/lib/ole/storage/meta_data.rb
@ -0,0 +1,148 @@
+require 'ole/types/property_set'
+
+module Ole
+	class Storage
+		#
+		# The MetaData class is designed to be high level interface to all the
+		# underlying meta data stored within different sections, themselves within
+		# different property set streams.
+		#
+		# With this class, you can simply get properties using their names, without
+		# needing to know about the underlying guids, property ids etc.
+		#
+		# Example:
+		#
+		#   Ole::Storage.open('test.doc') { |ole| p ole.meta_data.doc_author }
+		#
+		# TODO:
+		#
+		# * add write support
+		# * fix some of the missing type coercion (eg FileTime)
+		# * maybe add back the ability to access individual property sets as a unit
+		#   directly. ie <tt>ole.summary_information</tt>. Is this useful?
+		# * full key support, for unknown keys, like
+		#   <tt>ole.meta_data[myguid, myid]</tt>. probably needed for user-defined
+		#   properties too.
+		#
+		class MetaData
+			include Enumerable
+
+			FILE_MAP = {
+				Types::PropertySet::FMTID_SummaryInformation => "\005SummaryInformation",
+				Types::PropertySet::FMTID_DocSummaryInfo => "\005DocumentSummaryInformation"
+			}
+
+			FORMAT_MAP = {
+				'MSWordDoc' => :doc
+			}
+
+			CLSID_EXCEL97 = Types::Clsid.parse "{00020820-0000-0000-c000-000000000046}"
+			CLSID_EXCEL95 = Types::Clsid.parse "{00020810-0000-0000-c000-000000000046}"
+			CLSID_WORD97  = Types::Clsid.parse "{00020906-0000-0000-c000-000000000046}"
+			CLSID_WORD95  = Types::Clsid.parse "{00020900-0000-0000-c000-000000000046}"
+
+			CLSID_MAP = {
+				CLSID_EXCEL97 => :xls,
+				CLSID_EXCEL95 => :xls,
+				CLSID_WORD97  => :doc,
+				CLSID_WORD95  => :doc
+			}
+
+			MIME_TYPES = {
+				:xls => 'application/vnd.ms-excel',
+				:doc => 'application/msword',
+				:ppt => 'application/vnd.ms-powerpoint',
+				# not registered at IANA, but seems most common usage
+				:msg => 'application/vnd.ms-outlook',
+				# this is my default fallback option. also not registered at IANA.
+				# file(1)'s default is application/msword, which is useless...
+				nil  => 'application/x-ole-storage'
+			}
+
+			def initialize ole
+				@ole = ole
+			end
+
+			# i'm thinking of making file_format and mime_type available through
+			# #[], #each, and #to_h also, as calculated meta data (not assignable)
+
+			def comp_obj
+				return {} unless dirent = @ole.root["\001CompObj"]
+				data = dirent.read
+				# see - https://gnunet.org/svn/Extractor/doc/StarWrite_File_Format.html
+				# compobj_version: 0x0001
+				# byte_order: 0xffe
+				# windows_version: 0x00000a03 (win31 apparently)
+				# marker: 0xffffffff
+				compobj_version, byte_order, windows_version, marker, clsid =
+					data.unpack("vvVVa#{Types::Clsid::SIZE}")
+				strings = []
+				i = 28
+				while i < data.length
+					len = data[i, 4].unpack('V').first
+					i += 4
+					strings << data[i, len - 1]
+					i += len
+				end
+				# in the unknown chunk, you usually see something like 'Word.Document.6'
+				{:username => strings[0], :file_format => strings[1], :unknown => strings[2..-1]}
+			end
+			private :comp_obj
+
+			def file_format
+				comp_obj[:file_format]
+			end
+
+			def mime_type
+				# based on the CompObj stream contents
+				type = FORMAT_MAP[file_format]
+				return MIME_TYPES[type] if type
+
+				# based on the root clsid
+				type = CLSID_MAP[Types::Clsid.load(@ole.root.clsid)]
+				return MIME_TYPES[type] if type
+
+				# fallback to heuristics
+				has_file = Hash[*@ole.root.children.map { |d| [d.name.downcase, true] }.flatten]
+				return MIME_TYPES[:msg] if has_file['__nameid_version1.0'] or has_file['__properties_version1.0']
+				return MIME_TYPES[:doc] if has_file['worddocument'] or has_file['document']
+				return MIME_TYPES[:xls] if has_file['workbook'] or has_file['book']
+
+				MIME_TYPES[nil]
+			end
+
+			def [] key
+				pair = Types::PropertySet::PROPERTY_MAP[key.to_s] or return nil
+				file = FILE_MAP[pair.first] or return nil
+				dirent = @ole.root[file] or return nil
+				dirent.open { |io| return Types::PropertySet.new(io)[key] }
+			end
+
+			def []= key, value
+				raise NotImplementedError, 'meta data writes not implemented'
+			end
+
+			def each(&block)
+				FILE_MAP.values.each do |file|
+					dirent = @ole.root[file] or next
+					dirent.open { |io| Types::PropertySet.new(io).each(&block) }
+				end
+			end
+
+			def to_h
+				inject({}) { |hash, (name, value)| hash.update name.to_sym => value }
+			end
+
+			def method_missing name, *args, &block
+				return super unless args.empty?
+				pair = Types::PropertySet::PROPERTY_MAP[name.to_s] or return super
+				self[name]
+			end
+		end
+
+		def meta_data
+			@meta_data ||= MetaData.new(self)
+		end
+	end
+end
+
--- a/lib/ole/support.rb
+++ b/lib/ole/support.rb
@ -1,40 +1,73 @@
-#! /usr/bin/ruby
-
 # 
 # A file with general support functions used by most files in the project.
 # 
+# These are the only methods added to other classes.
+# 

 require 'logger'
+require 'stringio'
+require 'enumerator'
+
+class String # :nodoc:
+	# plural of String#index. returns all offsets of +string+. rename to indices?
+	#
+	# note that it doesn't check for overlapping values.
+	def indexes string
+		# in some ways i'm surprised that $~ works properly in this case...
+		to_enum(:scan, /#{Regexp.quote string}/m).map { $~.begin 0 }
+	end
+
+	def each_chunk size
+		(length / size.to_f).ceil.times { |i| yield self[i * size, size] }
+	end
+end

 class File # :nodoc:
-	# for consistency with StringIO and others. makes more sense than forcing
-	# them to provide a #stat
+	# for interface consistency with StringIO etc (rather than adding #stat
+	# to them). used by RangesIO.
 	def size
 		stat.size
 	end
 end

 class Symbol # :nodoc:
-	def to_proc
-		proc { |a| a.send self }
+	unless :x.respond_to? :to_proc
+		def to_proc
+			proc { |a| a.send self }
+		end
 	end
 end

 module Enumerable # :nodoc:
-	# 1.9 backport
-	def group_by
-		hash = Hash.new { |hash, key| hash[key] = [] }
-		each { |item| hash[yield(item)] << item }
-		hash
+	unless [].respond_to? :group_by
+		# 1.9 backport
+		def group_by
+			hash = Hash.new { |h, key| h[key] = [] }
+			each { |item| hash[yield(item)] << item }
+			hash
+		end
 	end

-	def sum initial=0
-		inject(initial) { |a, b| a + b }
+	unless [].respond_to? :sum
+		def sum initial=0
+			inject(initial) { |a, b| a + b }
+		end
+	end
+end
+
+# move to support?
+class IO # :nodoc:
+	# Copy data from IO-like object +src+, to +dst+
+	def self.copy src, dst
+		until src.eof?
+			buf = src.read(4096)
+			dst.write buf
+		end
 	end
 end

 class Logger # :nodoc:
-	# A helper method for creating <tt>Logger</tt>s which produce call stack
+	# A helper method for creating a +Logger+ which produce call stack
 	# in their output
 	def self.new_with_callstack logdev=STDERR
 		log = Logger.new logdev
@ -48,4 +81,176 @@ class Logger # :nodoc:
 		end
 		log
 	end
-end
+end
+
+# Include this module into a class that defines #each_child. It should
+# maybe use #each instead, but its easier to be more specific, and use
+# an alias.
+#
+# I don't want to force the class to cache children (eg where children
+# are loaded on request in pst), because that forces the whole tree to
+# be loaded. So, the methods should only call #each_child once, and 
+# breadth first iteration holds its own copy of the children around.
+#
+# Main methods are #recursive, and #to_tree
+module RecursivelyEnumerable # :nodoc:
+	def each_recursive_depth_first(&block)
+		each_child do |child|
+			yield child
+			if child.respond_to? :each_recursive_depth_first
+				child.each_recursive_depth_first(&block)
+			end
+		end
+	end
+
+	# don't think this is actually a proper breadth first recursion. only first
+	# level is breadth first.
+	def each_recursive_breadth_first(&block)
+		children = []
+		each_child do |child|
+			children << child if child.respond_to? :each_recursive_breadth_first
+			yield child
+		end
+		children.each { |child| child.each_recursive_breadth_first(&block) }
+	end
+
+	def each_recursive mode=:depth_first, &block
+		# we always actually yield ourself (the tree root) before recursing
+		yield self
+		send "each_recursive_#{mode}", &block
+	end
+
+	# the idea of this function, is to allow use of regular Enumerable methods
+	# in a recursive fashion. eg:
+	#
+	#   # just looks at top level children
+	#   root.find { |child| child.some_condition? }
+	#   # recurse into all children getting non-folders, breadth first
+	#   root.recursive(:breadth_first).select { |child| !child.folder? }
+	#   # just get everything
+	#   items = root.recursive.to_a
+	#
+	def recursive mode=:depth_first
+		to_enum(:each_recursive, mode)
+	end
+
+	# streams a "tree" form of the recursively enumerable structure to +io+, or
+	# return a string form instead if +io+ is not specified.
+	#
+	# mostly a debugging aid. can specify a different block which will be called
+	# to provide the string form for each node.
+	def to_tree io='', &inspect
+		inspect ||= :inspect.to_proc
+		io << "- #{inspect[self]}\n"
+		recurse = proc do |node, prefix|
+			child = nil
+			node.each_child do |next_child|
+				if child
+					io << "#{prefix}|- #{inspect[child]}\n"
+					recurse.call child, prefix + '|  '
+				end
+				child = next_child
+			end if node.respond_to?(:each_child)
+			if child
+				io << "#{prefix}\\- #{inspect[child]}\n"
+				recurse.call child, prefix + '   '
+			end
+		end
+		recurse.call self, '  '
+		io
+	end
+end
+
+# can include File::Constants
+class IO
+	# this is for jruby
+	include File::Constants unless defined?(RDONLY)
+
+	# nabbed from rubinius, and modified
+	def self.parse_mode mode
+		ret = 0
+
+		case mode[0, 1]
+		when 'r'; ret |= RDONLY
+		when 'w'; ret |= WRONLY | CREAT | TRUNC
+		when 'a'; ret |= WRONLY | CREAT | APPEND
+		else raise ArgumentError, "illegal access mode #{mode}"
+		end
+
+		(1...mode.length).each do |i|
+			case mode[i, 1]
+			when '+'; ret = (ret & ~(RDONLY | WRONLY)) | RDWR
+			when 'b'; ret |= Mode::BINARY
+			else raise ArgumentError, "illegal access mode #{mode}"
+			end
+		end
+	
+		ret
+	end
+
+	class Mode
+		# ruby 1.9 defines binary as 0, which isn't very helpful.
+		# its 4 in rubinius. no longer using
+		#
+		#   BINARY = 0x4 unless defined?(BINARY)
+		#
+		# for that reason, have my own constants module here
+		module Constants
+			include File::Constants
+			BINARY = 0x4
+		end
+		
+		include Constants
+		NAMES = %w[rdonly wronly rdwr creat trunc append binary]
+
+		attr_reader :flags
+		def initialize flags
+			flags = IO.parse_mode flags.to_str if flags.respond_to? :to_str
+			raise ArgumentError, "invalid flags - #{flags.inspect}" unless Fixnum === flags
+			@flags = flags
+		end
+
+		def writeable?
+			#(@flags & RDONLY) == 0
+			(@flags & 0x3) != RDONLY
+		end
+
+		def readable?
+			(@flags & WRONLY) == 0
+		end
+
+		def truncate?
+			(@flags & TRUNC) != 0
+		end
+
+		def append?
+			(@flags & APPEND) != 0
+		end
+
+		def create?
+			(@flags & CREAT) != 0
+		end
+
+		def binary?
+			(@flags & BINARY) != 0
+		end
+
+=begin
+		# revisit this
+		def apply io
+			if truncate?
+				io.truncate 0
+			elsif append?
+				io.seek IO::SEEK_END, 0
+			end
+		end
+=end
+
+		def inspect
+			names = NAMES.map { |name| name if (flags & Mode.const_get(name.upcase)) != 0 }
+			names.unshift 'rdonly' if (flags & 0x3) == 0
+			"#<#{self.class} #{names.compact * '|'}>"
+		end
+	end
+end
+
--- a/lib/ole/types.rb
+++ b/lib/ole/types.rb
@ -1,27 +1,2 @@
-require 'ole/base'
-
-module Ole # :nodoc:
-	# FIXME
-	module Types
-		# Parse two 32 bit time values into a DateTime
-		# Time is stored as a high and low 32 bit value, comprising the
-		# 100's of nanoseconds since 1st january 1601 (Epoch).
-		# struct FILETIME. see eg http://msdn2.microsoft.com/en-us/library/ms724284.aspx
-		def self.load_time str
-			low, high = str.unpack 'L2'
-			time = EPOCH + (high * (1 << 32) + low) * 1e-7 / 86400 rescue return
-			# extra sanity check...
-			unless (1800...2100) === time.year
-				Log.warn "ignoring unlikely time value #{time.to_s}"
-				return nil
-			end
-			time
-		end
-
-		# turn a binary guid into something displayable.
-		# this will probably become a proper class later
-		def self.load_guid str
-			"{%08x-%04x-%04x-%02x%02x-#{'%02x' * 6}}" % str.unpack('L S S CC C6')
-		end
-	end
-end
+require 'ole/types/base'
+require 'ole/types/property_set'
--- a/lib/ole/types/base.rb
+++ b/lib/ole/types/base.rb
@ -0,0 +1,251 @@
+require 'iconv'
+require 'date'
+
+require 'ole/base'
+
+module Ole # :nodoc:
+	#
+	# The Types module contains all the serialization and deserialization code for standard ole
+	# types.
+	#
+	# It also defines all the variant type constants, and symbolic names.
+	#
+	module Types
+		# for anything that we don't have serialization code for
+		class Data < String
+			def self.load str
+				new str
+			end
+			
+			def self.dump str
+				str.to_s
+			end
+		end
+
+		class Lpstr < String
+			def self.load str
+				# not sure if its always there, but there is often a trailing
+				# null byte.
+				new str.chomp(0.chr)
+			end
+
+			def self.dump str
+				# do i need to append the null byte?
+				str.to_s
+			end
+		end
+
+		# for VT_LPWSTR
+		class Lpwstr < String
+			FROM_UTF16 = Iconv.new 'utf-8', 'utf-16le'
+			TO_UTF16   = Iconv.new 'utf-16le', 'utf-8'
+			
+			def self.load str
+				new FROM_UTF16.iconv(str).chomp(0.chr)
+			end
+			
+			def self.dump str
+				# need to append nulls?
+				data = TO_UTF16.iconv str
+				# not sure if this is the recommended way to do it, but I want to treat
+				# the resulting utf16 data as regular bytes, not characters.
+				data.force_encoding Encoding::US_ASCII if data.respond_to? :encoding
+				data
+			end
+		end
+
+		# for VT_FILETIME
+		class FileTime < DateTime
+			SIZE = 8
+			EPOCH = new 1601, 1, 1
+
+			# Create a +DateTime+ object from a struct +FILETIME+
+			# (http://msdn2.microsoft.com/en-us/library/ms724284.aspx).
+			#
+			# Converts +str+ to two 32 bit time values, comprising the high and low 32 bits of
+			# the 100's of nanoseconds since 1st january 1601 (Epoch).
+			def self.load str
+				low, high = str.to_s.unpack 'V2'
+				# we ignore these, without even warning about it
+				return nil if low == 0 and high == 0
+				# switched to rational, and fixed the off by 1 second error i sometimes got.
+				# time = EPOCH + (high * (1 << 32) + low) / 1e7 / 86400 rescue return
+				# use const_get to ensure we can return anything which subclasses this (VT_DATE?)
+				const_get('EPOCH') + Rational(high * (1 << 32) + low, 1e7.to_i * 86400) rescue return
+				# extra sanity check...
+				#unless (1800...2100) === time.year
+				#	Log.warn "ignoring unlikely time value #{time.to_s}"
+				#	return nil
+				#end
+				#time
+			end
+			
+			# +time+ should be able to be either a Time, Date, or DateTime.
+			def self.dump time
+				# i think i'll convert whatever i get to be a datetime, because of
+				# the covered range.
+				return 0.chr * SIZE unless time
+				time = time.send(:to_datetime) if Time === time
+				# don't bother to use const_get here
+				bignum = (time - EPOCH) * 86400 * 1e7.to_i
+				high, low = bignum.divmod 1 << 32
+				[low, high].pack 'V2'
+			end
+			
+			def inspect
+				"#<#{self.class} #{to_s}>"
+			end
+		end
+
+		# for VT_CLSID
+		# Unlike most of the other conversions, the Guid's are serialized/deserialized by actually
+		# doing nothing! (eg, _load & _dump are null ops)
+		# Rather, its just a string with a different inspect string, and it includes a
+		# helper method for creating a Guid from that readable form (#format).
+		class Clsid < String
+			SIZE = 16
+			PACK = 'V v v CC C6'
+
+			def self.load str
+				new str.to_s
+			end
+			
+			def self.dump guid
+				return 0.chr * SIZE unless guid
+				# allow use of plain strings in place of guids.
+				guid['-'] ? parse(guid) : guid
+			end
+			
+			def self.parse str
+				vals = str.scan(/[a-f\d]+/i).map(&:hex)
+				if vals.length == 5
+					# this is pretty ugly
+					vals[3] = ('%04x' % vals[3]).scan(/../).map(&:hex)
+					vals[4] = ('%012x' % vals[4]).scan(/../).map(&:hex)
+					guid = new vals.flatten.pack(PACK)
+					return guid if guid.format.delete('{}') == str.downcase.delete('{}')
+				end
+				raise ArgumentError, 'invalid guid - %p' % str
+			end
+
+			def format
+				"%08x-%04x-%04x-%02x%02x-#{'%02x' * 6}" % unpack(PACK)
+			end
+			
+			def inspect
+				"#<#{self.class}:{#{format}}>"
+			end
+		end
+
+		#
+		# The OLE variant types, extracted from
+		# http://www.marin.clara.net/COM/variant_type_definitions.htm.
+		#
+		# A subset is also in WIN32OLE::VARIANT, but its not cross platform (obviously).
+		#
+		# Use like:
+		#
+		#   p Ole::Types::Variant::NAMES[0x001f] => 'VT_LPWSTR'
+		#   p Ole::Types::VT_DATE # => 7
+		#
+		# The serialization / deserialization functions should be fixed to make it easier
+		# to work with. like
+		#
+		#   Ole::Types.from_str(VT_DATE, data) # and
+		#   Ole::Types.to_str(VT_DATE, data)
+		#
+		# Or similar, rather than having to do VT_* <=> ad hoc class name etc as it is
+		# currently.
+		#
+		module Variant
+			NAMES = {
+				0x0000 => 'VT_EMPTY',
+				0x0001 => 'VT_NULL',
+				0x0002 => 'VT_I2',
+				0x0003 => 'VT_I4',
+				0x0004 => 'VT_R4',
+				0x0005 => 'VT_R8',
+				0x0006 => 'VT_CY',
+				0x0007 => 'VT_DATE',
+				0x0008 => 'VT_BSTR',
+				0x0009 => 'VT_DISPATCH',
+				0x000a => 'VT_ERROR',
+				0x000b => 'VT_BOOL',
+				0x000c => 'VT_VARIANT',
+				0x000d => 'VT_UNKNOWN',
+				0x000e => 'VT_DECIMAL',
+				0x0010 => 'VT_I1',
+				0x0011 => 'VT_UI1',
+				0x0012 => 'VT_UI2',
+				0x0013 => 'VT_UI4',
+				0x0014 => 'VT_I8',
+				0x0015 => 'VT_UI8',
+				0x0016 => 'VT_INT',
+				0x0017 => 'VT_UINT',
+				0x0018 => 'VT_VOID',
+				0x0019 => 'VT_HRESULT',
+				0x001a => 'VT_PTR',
+				0x001b => 'VT_SAFEARRAY',
+				0x001c => 'VT_CARRAY',
+				0x001d => 'VT_USERDEFINED',
+				0x001e => 'VT_LPSTR',
+				0x001f => 'VT_LPWSTR',
+				0x0040 => 'VT_FILETIME',
+				0x0041 => 'VT_BLOB',
+				0x0042 => 'VT_STREAM',
+				0x0043 => 'VT_STORAGE',
+				0x0044 => 'VT_STREAMED_OBJECT',
+				0x0045 => 'VT_STORED_OBJECT',
+				0x0046 => 'VT_BLOB_OBJECT',
+				0x0047 => 'VT_CF',
+				0x0048 => 'VT_CLSID',
+				0x0fff => 'VT_ILLEGALMASKED',
+				0x0fff => 'VT_TYPEMASK',
+				0x1000 => 'VT_VECTOR',
+				0x2000 => 'VT_ARRAY',
+				0x4000 => 'VT_BYREF',
+				0x8000 => 'VT_RESERVED',
+				0xffff => 'VT_ILLEGAL'
+			}
+
+			CLASS_MAP = {
+				# haven't seen one of these. wonder if its same as FILETIME?
+				#'VT_DATE' => ?,
+				'VT_LPSTR' => Lpstr,
+				'VT_LPWSTR' => Lpwstr,
+				'VT_FILETIME' => FileTime,
+				'VT_CLSID' => Clsid
+			}
+
+			module Constants
+				NAMES.each { |num, name| const_set name, num }
+			end
+			
+			def self.load type, str
+				type = NAMES[type] or raise ArgumentError, 'unknown ole type - 0x%04x' % type
+				(CLASS_MAP[type] || Data).load str
+			end
+			
+			def self.dump type, variant
+				type = NAMES[type] or raise ArgumentError, 'unknown ole type - 0x%04x' % type
+				(CLASS_MAP[type] || Data).dump variant
+			end
+		end
+
+		include Variant::Constants
+		
+		# deprecated aliases, kept mostly for the benefit of ruby-msg, until
+		# i release a new version.
+		def self.load_guid str
+			Variant.load VT_CLSID, str
+		end
+
+		def self.load_time str
+			Variant.load VT_FILETIME, str
+		end
+
+		FROM_UTF16 = Lpwstr::FROM_UTF16
+		TO_UTF16 = Lpwstr::TO_UTF16
+	end
+end
+
--- a/lib/ole/types/property_set.rb
+++ b/lib/ole/types/property_set.rb
@ -0,0 +1,165 @@
+require 'ole/types'
+require 'yaml'
+
+module Ole
+	module Types
+		#
+		# The PropertySet class currently supports readonly access to the properties
+		# serialized in "property set" streams, such as the file "\005SummaryInformation",
+		# in OLE files.
+		#
+		# Think it has its roots in MFC property set serialization.
+		#
+		# See http://poi.apache.org/hpsf/internals.html for details
+		#
+		class PropertySet
+			HEADER_SIZE = 28
+			HEADER_PACK = "vvVa#{Clsid::SIZE}V"
+			OS_MAP = {
+				0 => :win16,
+				1 => :mac,
+				2 => :win32,
+				0x20001 => :ooffice, # open office on linux...
+			}
+
+			# define a smattering of the property set guids. 
+			DATA = YAML.load_file(File.dirname(__FILE__) + '/../../../data/propids.yaml').
+				inject({}) { |hash, (key, value)| hash.update Clsid.parse(key) => value }
+
+			# create an inverted map of names to guid/key pairs
+			PROPERTY_MAP = DATA.inject({}) do |h1, (guid, data)|
+				data[1].inject(h1) { |h2, (id, name)| h2.update name => [guid, id] }
+			end
+
+			module Constants
+				DATA.each { |guid, (name, map)| const_set name, guid }
+			end
+
+			include Constants
+			include Enumerable
+
+			class Section
+				include Variant::Constants
+				include Enumerable
+
+				SIZE = Clsid::SIZE + 4
+				PACK = "a#{Clsid::SIZE}v"
+
+				attr_accessor :guid, :offset
+				attr_reader :length
+
+				def initialize str, property_set
+					@property_set = property_set
+					@guid, @offset = str.unpack PACK
+					self.guid = Clsid.load guid
+					load_header
+				end
+
+				def io
+					@property_set.io
+				end
+
+				def load_header
+					io.seek offset
+					@byte_size, @length = io.read(8).unpack 'V2'
+				end
+				
+				def [] key
+					each_raw do |id, property_offset|
+						return read_property(property_offset).last if key == id
+					end
+					nil
+				end
+				
+				def []= key, value
+					raise NotImplementedError, 'section writes not yet implemented'
+				end
+				
+				def each
+					each_raw do |id, property_offset|
+						yield id, read_property(property_offset).last
+					end
+				end
+
+			private
+
+				def each_raw
+					io.seek offset + 8
+					io.read(length * 8).each_chunk(8) { |str| yield(*str.unpack('V2')) }
+				end
+				
+				def read_property property_offset
+					io.seek offset + property_offset
+					type, value = io.read(8).unpack('V2')
+					# is the method of serialization here custom?
+					case type
+					when VT_LPSTR, VT_LPWSTR
+						value = Variant.load type, io.read(value)
+					# ....
+					end
+					[type, value]
+				end
+			end
+						
+			attr_reader :io, :signature, :unknown, :os, :guid, :sections
+			
+			def initialize io
+				@io = io
+				load_header io.read(HEADER_SIZE)
+				load_section_list io.read(@num_sections * Section::SIZE)
+				# expect no gap between last section and start of data.
+				#Log.warn "gap between section list and property data" unless io.pos == @sections.map(&:offset).min
+			end
+
+			def load_header str
+				@signature, @unknown, @os_id, @guid, @num_sections = str.unpack HEADER_PACK
+				# should i check that unknown == 0? it usually is. so is the guid actually
+				@guid = Clsid.load @guid
+				@os = OS_MAP[@os_id] || Log.warn("unknown operating system id #{@os_id}")
+			end
+
+			def load_section_list str
+				@sections = str.to_enum(:each_chunk, Section::SIZE).map { |s| Section.new s, self }
+			end
+			
+			def [] key
+				pair = PROPERTY_MAP[key.to_s] or return nil
+				section = @sections.find { |s| s.guid == pair.first } or return nil
+				section[pair.last]
+			end
+			
+			def []= key, value
+				pair = PROPERTY_MAP[key.to_s] or return nil
+				section = @sections.find { |s| s.guid == pair.first } or return nil
+				section[pair.last] = value
+			end
+			
+			def method_missing name, *args, &block
+				if name.to_s =~ /(.*)=$/
+					return super unless args.length == 1
+					return super unless PROPERTY_MAP[$1]
+					self[$1] = args.first
+				else
+					return super unless args.length == 0
+					return super unless PROPERTY_MAP[name.to_s]
+					self[name]
+				end
+			end
+			
+			def each
+				@sections.each do |section|
+					next unless pair = DATA[section.guid]
+					map = pair.last
+					section.each do |id, value|
+						name = map[id] or next
+						yield name, value
+					end
+				end
+			end
+			
+			def to_h
+				inject({}) { |hash, (name, value)| hash.update name.to_sym => value }
+			end
+		end
+	end
+end