首页 > 编程 > Java > 正文

<java>office文件内容读取

2019-11-08 18:41:42
字体:
来源:转载
供稿:网友

实现对office文件内容的读取 使用jdk版本为1.7 使用jar包poi-3.9下载链接

import org.apache.poi.POIxmlDocument;import org.apache.poi.POIXMLTextExtractor;import org.apache.poi.hssf.usermodel.HSSFRow;import org.apache.poi.hssf.usermodel.HSSFSheet;import org.apache.poi.hssf.usermodel.HSSFWorkbook;import org.apache.poi.hwpf.extractor.WordExtractor;import org.apache.poi.xslf.extractor.XSLFPowerpointExtractor;import org.apache.poi.xssf.usermodel.XSSFCell;import org.apache.poi.xssf.usermodel.XSSFRow;import org.apache.poi.xssf.usermodel.XSSFSheet;import org.apache.poi.xssf.usermodel.XSSFWorkbook;import org.apache.poi.xwpf.extractor.XWPFWordExtractor;import org.apache.poi.xwpf.usermodel.XWPFDocument;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileReader;/** * Created by leo01 on 17-1-27. */public class test { public static void main(String[] args) throws Exception { String targetDirectory = "/home/leo01/Desktop"; //获取文件夹位置 File file = new File(targetDirectory); //保存所有文件 String test[]; test = file.list(); //获取所有文件的文件名 for(int i = 0;i<test.length;i++) { String ss = ""; File f = new File(test[i]); String filename = f.getName(); //PRefix保存了文件后缀 String prefix = filename.substring(filename.lastIndexOf(".")+1); //fis2 为文件路径 String fis2 = targetDirectory+File.separator+test[i]; FileInputStream fis = new FileInputStream(fis2); //*.doc 文件读取 if("doc".equals(prefix)) { WordExtractor wordExtractor; try { FileInputStream docfile = new FileInputStream(fis2); wordExtractor = new WordExtractor(docfile); String[] paragraph = wordExtractor.getParagraphText(); for(int j=0;j<paragraph.length;j++) { //paragraph 里面存的是每个段落的文字 ss += paragraph[j].toString(); } System.out.println(ss); } catch (Exception e) { e.printStackTrace(); } } //*.docx 文件读取 /*else if("docx".equals(prefix)) { try { XWPFDocument xwpfDocument = new XWPFDocument(fis); POIXMLTextExtractor ex = new XWPFWordExtractor(xwpfDocument); //String.trim 为删除字符串前后空格 ss = ex.getText().trim(); System.out.println(ss); } catch (Exception e) { e.printStackTrace(); } }*/ // *.xls 文件读取 else if("xls".equals(prefix)) { HSSFWorkbook hssfWorkbook = new HSSFWorkbook(fis); //获得第一个工作表Sheet HSSFSheet hssfSheet = hssfWorkbook.getSheetAt(0); //获得第一行ROW HSSFRow hssfRow = hssfSheet.getRow(0); //用StringBuffer 得到 Excel 表格第一行的内容并用都好分隔 StringBuffer stringBuffer = new StringBuffer(); for(int j=0;j<hssfRow.getLastCellNum();j++) { stringBuffer.append(hssfRow.getCell(j)); int fc = hssfRow.getLastCellNum()-1; if(j != fc) { stringBuffer.append(","); } } System.out.println(stringBuffer); } //*.xlsx 文件读取 /*else if("xlsx".equals(prefix)) { XSSFWorkbook xssfWorkbook = new XSSFWorkbook(fis); //取得第一个工作表Sheet XSSFSheet xssfSheet = xssfWorkbook.getSheetAt(0); //取得第一行 XSSFRow xssfRow = xssfSheet.getRow(0); //循环列cell StringBuffer stringBuffer = new StringBuffer(); for(int j =0;j<xssfRow.getLastCellNum();j++) { XSSFCell xssfCell = xssfRow.getCell(j); stringBuffer.append(xssfCell); int fc = xssfRow.getLastCellNum()-1; if(j != fc) { stringBuffer.append(","); } } System.out.println(stringBuffer); }*/ //*.txt 文件读取 else if("txt".equals(prefix)) { BufferedReader reader; try { reader = new BufferedReader(new FileReader(fis2)); while(reader.ready()) { ss += reader.readLine(); } reader.close(); System.out.println(ss); } catch (Exception e) { e.printStackTrace(); } } } }}

对docx和xlsx文件读取是总是出现错误java.lang.NoClassDefFoundError异常,还不知道为什么会出现这个异常。 异常


发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表