程序的跑批日志有时候累加的多了会特别的大,如果我们只需要获取文件的后几行的话,就不需要依次遍历。
自定义方法
import os def file_get_last_lines(file_path, num): """ 读取大文件的最后几行 :param file_path: 文件路径 :param num: 读取行数 :return: """ num = int(num) blk_size_max = 4096 n_lines = [] with open(file_path, 'rb') as fp: fp.seek(0, os.SEEK_END) cur_pos = fp.tell() while cur_pos > 0 and len(n_lines) < num: blk_size = min(blk_size_max, cur_pos) fp.seek(cur_pos - blk_size, os.SEEK_SET) blk_data = fp.read(blk_size) assert len(blk_data) == blk_size lines = blk_data.split(b'\n') # adjust cur_pos if len(lines) > 1 and len(lines[0]) > 0: n_lines[0:0] = lines[1:] cur_pos -= (blk_size - len(lines[0])) else: n_lines[0:0] = lines cur_pos -= blk_size fp.seek(cur_pos, os.SEEK_SET) if len(n_lines) > 0 and len(n_lines[-1]) == 0: del n_lines[-1] return n_lines[-num:]
使用示例
file_path = 'D:/python/PyCharmProjects/jianbao/Trans/20201130/SH603131.csv' lines = file_get_last_lines(file_path, 5) print(lines) print('--------------------------------') for line in lines: print(line.decode('utf-8')) # 注意:读出来的是二进制,需要解码一下变成字符串
输出:
[b'140306,810,27.13,8123,3905856,S,6439959,5587158,5,1,14:05:09.589\r', b'140306,810,27.13,8677,3905857,S,6439959,5587159,5,1,14:05:09.590\r', b'140307,60,27.13,300,3905925,S,6440135,5587159,5,1,14:05:10.034\r', b'140402,470,27.13,500,3916594,S,6458210,5587159,5,1,14:06:05.042\r', b'140434,340,27.13,300,3922790,S,6468534,5587159,5,1,14:06:37.083\r'] -------------------------------- 140306,810,27.13,8123,3905856,S,6439959,5587158,5,1,14:05:09.589 140306,810,27.13,8677,3905857,S,6439959,5587159,5,1,14:05:09.590 140307,60,27.13,300,3905925,S,6440135,5587159,5,1,14:05:10.034 140402,470,27.13,500,3916594,S,6458210,5587159,5,1,14:06:05.042 140434,340,27.13,300,3922790,S,6468534,5587159,5,1,14:06:37.083
参考: