Source code for pylam.base.indexedfile

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os

[docs]class IndexedFile(object): """ Generic class for pre-indexed file objects. :param filename: file name :type filename: str :return: indexed file object :rtype: .IndexedFile """ def __init__(self, filename): self.filename = filename self._fileByteSize = os.stat(self.filename).st_size self._line_offset = [] self._currentLineIndex = 0 self._indexFile()
[docs] def _indexFile(self): """ Generates the byte offset table. Method is called in :meth:`__init__`. """ with open(self.filename, 'r') as f: offset = 0 no = 0 for line in f: self._parseLine(line, no) self._line_offset.append(offset) offset += len(line) no += 1
[docs] def _parseLine(self, line, no): """ Method which will be called in :meth:`._indexFile` for each line of the the File. :param line: line to parse :type line: str :param no: number of the line :type no: int """ pass
[docs] def _getOffsets(self, startLineIndex, endLineIndex): """ :param startLineIndex: index of first line :type startLineIndex: int :param endLineIndex: index of last line (included!) :type endLineIndex: int :return: start byte offset and byte length :rtype: tuple(int, int) """ # print 'startLineIndex:', startLineIndex # print 'len(self._line_offset):',len(self._line_offset) startOffset = self._line_offset[startLineIndex] if endLineIndex >= len(self._line_offset): raise IndexError,'invalid line index' elif endLineIndex == len(self._line_offset) - 1: length = self._fileByteSize - startOffset else: length = self._line_offset[endLineIndex + 1] - startOffset - 1 return startOffset, length
[docs] def getLines(self, startLineIndex, endLineIndex): """ Returns a part of the file as a string. :param startLineIndex: index of first line :type startLineIndex: int :param endLineIndex: index of last line (included!) :type endLineIndex: int :return: part of the file :rtype: str """ startOffset, length = self._getOffsets(startLineIndex, endLineIndex) with open(self.filename, 'r') as f: f.seek(startOffset) return f.read(length)
[docs] def getLine(self, lineIndex): """ Returns the line with a given index (0,1,..) as a string. :param lineIndex: index of line in file :type lineIndex: int :return: line :rtype: str """ return self.getLines(lineIndex, lineIndex)
[docs] def __len__(self): """ Returns the total number of lines in the file. (support for ``len()``""" return len(self._line_offset)
def __getitem__(self, index): return self.getLine(index) def __iter__(self): return self
[docs] def next(self): """ Returns the *next* line from the file. """ if self._currentLineIndex < len(self._line_offset): line = self.getLine(self._currentLineIndex) self._currentLineIndex += 1 return line self._currentLineIndex = 0 raise StopIteration()
@property def fileLineSize(self): return len(self._line_offset)