本文共 2004 字,大约阅读时间需要 6 分钟。
写入数据到Hadoop分布式文件系统中的TV表数据
最近在项目中需要将大量数据写入Hadoop分布式文件系统中的TV表中,本文将详细介绍SQL表的创建和Java代码实现。
TV表的SQL定义如下:
CREATE TABLE tv (thedate STRING,tv STRING)
Java代码实现:
import java.io.BufferedWriter;import java.io.IOException;import java.io.OutputStreamWriter;import java.text.SimpleDateFormat;import java.util.Calendar;import java.util.Date;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.FSDataOutputStream;import org.apache.hadoop.fs.FileSystem;import org.apache.hadoop.fs.Path;
public class ImportData {
private static Configuration conf = null;private static FileSystem fs = null;public static void main(String[] args) throws IOException { conf = ConfigurationUtil.getConf(); fs = FileSystem.get(conf); String pathString = "/group/tbdp-etao-adhoc/p4padhoc/tablelist/tv"; Calendar calendar = Calendar.getInstance(); calendar.setTime(new Date()); calendar.set(2013, 1, 1); SimpleDateFormat sf = new SimpleDateFormat("yyyyMMdd"); while (calendar.get(Calendar.YEAR) < 2014) { String dateStr = sf.format(calendar.getTime()); String path = pathString + "/dt=" + dateStr; mkdir(path); writeTVData(path, dateStr); calendar.add(Calendar.DAY_OF_YEAR, 1); } fs.close();}public static void writeTVData(String dir, String dateStr) throws IOException { FSDataOutputStream fos = null; OutputStreamWriter osw = null; BufferedWriter bw = null; fos = fs.create(new Path(dir + "/data.txt"), false); osw = new OutputStreamWriter(fos); bw = new BufferedWriter(osw); for (int i = 0; i < 100000; i++) { StringBuffer strBuffer = new StringBuffer(); strBuffer.append(dateStr); strBuffer.append((char) 1); strBuffer.append(i); strBuffer.append(System.lineSeparator()); bw.write(strBuffer.toString()); } bw.close(); osw.close(); fos.close();}public static void mkdir(String dir) throws IOException { Path p = new Path(dir); if (!fs.exists(p)) { fs.mkdirs(p); }}
}
转载地址:http://pmyfk.baihongyu.com/