import time import struct # dictionary to map device names to numeric values # (<255 means we can fit it into a 1-byte categorical variable) device_map = { "iphone":0, "android":1, "windows":2 } # write header output_file = open("example.dmp","w") output_file.write("name: example\n") output_file.write("encoding: binary\n") output_file.write("field: latitude,float\n") output_file.write("field: longitude,float\n") output_file.write("field: checkin_time,uint64\n") output_file.write("field: checkin_device,uint8\n") for (k,v) in device_map.iteritems(): output_file.write("valname: checkin_device," + str(v) + "," + str(k) +"\n") output_file.write("\n") # write records based on the csv lines input_file = open("example.csv","r") line_no = 0 while True: line_no += 1 line = input_file.readline().strip() if not line: break if line_no == 1: continue tokens = line.split(",") latitude = float(tokens[0]) longitude = float(tokens[1]) checkin_time = int(time.mktime(time.strptime(tokens[2],"%Y-%m-%dT%H:%M"))) checkin_device = int(device_map[tokens[3]]) # print latitude, longitude, checkin_time, checkin_device # little endian: 4 bytes float, 4 bytes float, 8 bytes unix time, 1 byte device data = struct.pack("