我今天尝试使用standford chinese segmenter进行中文句子分词,但是,将分词结果写到txt中的时候出现乱码,主要代码如下:
//声明
Properties props = new Properties();
props.setProperty("sighanCorporaDict", "data");
props.setProperty("serDictionary","data/dict-chris6.ser.gz");
props.setProperty("inputEncoding", "gb18030");
props.setProperty("sighanPostProcessing", "true");
CRFClassifier classifier = new CRFClassifier(props);
classifier.loadClassifierNoExceptions("data/ctb.gz", props);
classifier.flags.setProperties(props);
//结果写入
FileOutputStream fos = new FileOutputStream(“D:\\output.txt”);
ObjectOutputStream objectOutputStream = new ObjectOutputStream(fos);
List<String> result = classifier.segmentString(temp);
objectOutputStream.writeObject(result);
谢谢!
//声明
Properties props = new Properties();
props.setProperty("sighanCorporaDict", "data");
props.setProperty("serDictionary","data/dict-chris6.ser.gz");
props.setProperty("inputEncoding", "gb18030");
props.setProperty("sighanPostProcessing", "true");
CRFClassifier classifier = new CRFClassifier(props);
classifier.loadClassifierNoExceptions("data/ctb.gz", props);
classifier.flags.setProperties(props);
//结果写入
FileOutputStream fos = new FileOutputStream(“D:\\output.txt”);
ObjectOutputStream objectOutputStream = new ObjectOutputStream(fos);
List<String> result = classifier.segmentString(temp);
objectOutputStream.writeObject(result);
谢谢!