🐍 Python - Files as Recursive Objects
Updated at 2019-06-07 11:59
If you are dealing with a lot of files, it might make sense to create more abstractions on top of the raw data. But keep in mind how much performance and memory you are using with all of these levels of abstraction.
from typing import Optional, Union, List
class Text:
_data: str
_paragraphs: List[str]
_lines: List[str]
_words: List[str]
def __init__(
self,
filename: Optional[str] = None,
data: Optional[Union[str, 'Text']] = None,
) -> 'Text':
if data is not None:
self._data = str(data)
elif filename is not None:
self._data = str(open(filename).read())
else:
raise Exception('filename or data is required')
self._paragraphs = self._data.strip('\n').split('\n\n')
self._lines = self._data.strip('\n').split('\n')
self._words = self._data.strip('\n').split()
def __repr__(self) -> str:
return self._data
@property
def paragraphs(self) -> List['Text']:
return [Text(data=p) for p in self._paragraphs]
def paragraph(self, index) -> 'Text':
return Text(data=self._paragraphs[index])
@property
def lines(self) -> List['Text']:
return [Text(data=l) for l in self._lines]
def line(self, index) -> 'Text':
return Text(data=self._lines[index])
@property
def words(self) -> List['Text']:
return [Text(data=w) for w in self._words]
def word(self, index) -> 'Text':
return Text(data=self._words[index])
import tempfile
tf = tempfile.NamedTemporaryFile()
tf.write(b'''
Evolution of Species
This is the first paragraph.
There can be multiple lines per paragraph.
This is the last line.
''')
tf.seek(0)
text = Text(tf.name)
assert len(text.paragraphs) == 3
assert str(text.paragraph(0)) == 'Evolution of Species'
assert str(text.paragraph(1)).count('\n') == 1
assert str(text.paragraph(2)).count('\n') == 0
# note that an empty line is still a line
assert len(text.lines) == 6
assert str(text.line(0)) == 'Evolution of Species'
assert str(text.line(1)) == ''
assert str(text.line(2)) == 'This is the first paragraph.'
assert str(text.line(-1)) == 'This is the last line.'
assert len(text.words) == 20
assert str(text.word(0)) == 'Evolution'
assert str(text.word(-1)) == 'line.'
# this allows complex lookups, like the following:
# "second paragraph, second line, last word"
assert str(text.paragraph(1).line(1).word(-1)) == 'paragraph.'
# "second word of each paragraph"
assert [str(p.word(1)) for p in text.paragraphs] == ['of', 'is', 'is']
tf.close()