[Office 2010 開發 ] 利用 OpenXML 來了解您的 Word 文件的格式有哪些
在本文中您將會知道/學到:
- 如何依樣式擷取其內容
- 如何透過 Open XML SDK 來擴充方法
而在本文中將會主要介紹下方幾項實作內容:
- 透過 Open XML SDK 開啟 Word 文件
- 找尋樣式的 ID 及其名稱。包括 段落、表格等
- 在找到段落、表格後可針對該項目進行了解是否有特定的樣式名稱
- 範例展示並呈現其結果
>> 這是範本檔案內容。 有 段落、表格、粗體等….
>> 標題部份為標題1
>> 顯示表格。
>> 這是針對上述 Word 文件的分析結果統計。
☆ 程式部份
◇ 請先建立一個 Concole Application (主控台)
◇ 再把 Program.cs 檔更改成如下的程式碼
1: using System;
2: using System.Collections.Generic;
3: using System.Linq;
4: using System.Text;
5: using DocumentFormat.OpenXml.Packaging;
6: using DocumentFormat.OpenXml.Wordprocessing;
7: 8: namespace QueryContentBasedOnStyles
9: { 10: class Program
11: { 12: static void Main(string[] args)
13: { 14: // 宣告各個變數名稱。
15: string paraStyle = "heading 1";
16: string runStyle = "Intense Emphasis";
17: string tableStyle = "Light List Accent 1";
18: 19: // 開啟 Word 檔案
20: using (WordprocessingDocument myDoc =
21: WordprocessingDocument.Open("input.docx", true))
22: { 23: //把剛開啟的檑案指定成 MainDocumentPart
24: MainDocumentPart mainPart = myDoc.MainDocumentPart; 25: 26: //依序針對下方所指定的樣式進行處理
27: Console.WriteLine("Number of paragraphs with "
28: + paraStyle + " styles: "
29: + mainPart.ParagraphsByStyleName(paraStyle) 30: .Count()); 31: Console.WriteLine("Number of runs with "
32: + runStyle + " styles: "
33: + mainPart.RunsByStyleName(runStyle).Count()); 34: Console.WriteLine("Number of tables with "
35: + tableStyle + " styles: "
36: + mainPart.TablesByStyleName(tableStyle) 37: .Count()); 38: } 39: 40: Console.ReadKey(); 41: } 42: 43: 44: } 45: }
◇ 新增一個 WordStyleExtensions.cs 檔做為本次的 Open XML SDK 的 Extension Methods (延伸方法),其程式碼如下:
1: using System;
2: using System.Collections.Generic;
3: using System.Linq;
4: using System.Text;
5: using DocumentFormat.OpenXml.Packaging;
6: using DocumentFormat.OpenXml.Wordprocessing;
7: 8: namespace QueryContentBasedOnStyles
9: { 10: public static class WordStyleExtensions
11: { 12: // 取得樣式名稱
13: private static string GetStyleIdFromStyleName(
14: MainDocumentPart mainPart, string styleName)
15: { 16: StyleDefinitionsPart stylePart = 17: mainPart.StyleDefinitionsPart; 18: 19: string styleId = stylePart.Styles
20: .Descendants<StyleName>() 21: .Where(s => s.Val.Value.Equals(styleName)) 22: .Select(n => ((Style)n.Parent).StyleId) 23: .FirstOrDefault(); 24: 25: return styleId ?? styleName;
26: } 27: 28: // 段落
29: public static IEnumerable<Paragraph> ParagraphsByStyleName(
30: this MainDocumentPart mainPart, string styleName)
31: { 32: string styleId = GetStyleIdFromStyleName(mainPart, styleName);
33: 34: IEnumerable<Paragraph> paraList = mainPart.Document 35: .Descendants<Paragraph>() 36: .Where(p => IsParagraphInStyle(p, styleId)); 37: 38: return paraList;
39: } 40: 41: //判斷是否為段落
42: private static bool IsParagraphInStyle(Paragraph p,
43: string styleId)
44: { 45: ParagraphProperties pPr = p 46: .GetFirstChild<ParagraphProperties>(); 47: 48: if (pPr != null)
49: { 50: ParagraphStyleId paraStyle = pPr.ParagraphStyleId; 51: if (paraStyle != null)
52: { 53: return paraStyle.Val.Value.Equals(styleId);
54: } 55: } 56: return false;
57: } 58: 59: public static IEnumerable<Run> RunsByStyleName(
60: this MainDocumentPart mainPart, string styleName)
61: { 62: string styleId = GetStyleIdFromStyleName(mainPart, styleName);
63: 64: IEnumerable<Run> runList = mainPart.Document 65: .Descendants<Run>() 66: .Where(r => IsRunInStyle(r, styleId)); 67: 68: return runList;
69: } 70: 71: private static bool IsRunInStyle(Run r, string styleId)
72: { 73: RunProperties rPr = r.GetFirstChild<RunProperties>(); 74: 75: if (rPr != null)
76: { 77: RunStyle runStyle = rPr.RunStyle; 78: if (runStyle != null)
79: { 80: return runStyle.Val.Value.Equals(styleId);
81: } 82: } 83: return false;
84: } 85: 86: //表格
87: public static IEnumerable<Table> TablesByStyleName(
88: this MainDocumentPart mainPart, string styleName)
89: { 90: string styleId = GetStyleIdFromStyleName(mainPart, styleName);
91: IEnumerable<Table> tableList = mainPart.Document 92: .Descendants<Table>() 93: .Where(t => IsTableInStyle(t, styleId)); 94: 95: return tableList;
96: } 97: 98: //判斷是否為表格
99: private static bool IsTableInStyle(Table tbl, string styleId)
100: { 101: TableProperties tblPr = tbl.GetFirstChild<TableProperties>(); 102: 103: if (tblPr != null)
104: { 105: TableStyle tblStyle = tblPr.TableStyle; 106: if (tblStyle != null)
107: { 108: return tblStyle.Val.Value.Equals(styleId);
109: } 110: } 111: return false;
112: } 113: } 114: }
>> 檔案下載:點我下載
---> 本文預設於 2010.05.20 登入「Office/Sharepoint 開發組」
>> 參考翻譯及引用:Retrieving Word Content Based on Styles