Aspose.Words for .NET如何替换或修改超链接
程序员文章站
2022-04-09 09:16:12
...
在当前版本,没有在Aspose.Words
嵌入功能来处理超链接字段。
超链接在Microsoft Word
文档是字段,一个字段包含字段代码和字段结果,在当前版本的Aspose.Words
中,没有单一的对象代表了一个字段。
以下这个例子展示了如何创建一个简单的类,它代表一个文档中的超链接。它的构造函数接受一个FieldStart
对象,这个对象必须有FieldType.FieldHyperlink
类型。使用超链接类后,您可以获取或设置跳转目标地址,名字和IsLocal
属性。现在很容易在整个文档改变超链接目标和名称。在这个例子中,所有的超链接都改为“http://aspose.com”。
Example
找到一个Word
文档所有超链接和改变他们的URL
和显示名称。
using System;
using System.Text;
using System.Text.RegularExpressions;
using Aspose.Words;
using Aspose.Words.Fields;
namespace Examples
{
/// <summary>
/// Shows how to replace hyperlinks in a Word document.
/// </summary>
public class ExReplaceHyperlinks : ExBase
{
/// <summary>
/// Finds all hyperlinks in a Word document and changes their URL and display name.
/// </summary>
public void ReplaceHyperlinks()
{
// Specify your document name here.
Document doc = new Document(MyDir + "ReplaceHyperlinks.doc");
// Hyperlinks in a Word documents are fields, select all field start nodes so we can find the hyperlinks.
NodeList fieldStarts = doc.SelectNodes("//FieldStart");
foreach (FieldStart fieldStart in fieldStarts)
{
if (fieldStart.FieldType.Equals(FieldType.FieldHyperlink))
{
// The field is a hyperlink field, use the "facade" class to help to deal with the field.
Hyperlink hyperlink = new Hyperlink(fieldStart);
// Some hyperlinks can be local (links to bookmarks inside the document), ignore these.
if (hyperlink.IsLocal)
continue;
// The Hyperlink class allows to set the target URL and the display name
// of the link easily by setting the properties.
hyperlink.Target = NewUrl;
hyperlink.Name = NewName;
}
}
doc.Save(MyDir + "ReplaceHyperlinks Out.doc");
}
private const string NewUrl = @"http://www.aspose.com";
private const string NewName = "Aspose - The .NET & Java Component Publisher";
}
/// <summary>
/// This "facade" class makes it easier to work with a hyperlink field in a Word document.
///
/// A hyperlink is represented by a HYPERLINK field in a Word document. A field in Aspose.Words
/// consists of several nodes and it might be difficult to work with all those nodes directly.
/// Note this is a simple implementation and will work only if the hyperlink code and name
/// each consist of one Run only.
///
/// [FieldStart][Run - field code][FieldSeparator][Run - field result][FieldEnd]
///
/// The field code contains a string in one of these formats:
/// HYPERLINK "url"
/// HYPERLINK \l "bookmark name"
///
/// The field result contains text that is displayed to the user.
/// </summary>
internal class Hyperlink
{
internal Hyperlink(FieldStart fieldStart)
{
if (fieldStart == null)
throw new ArgumentNullException("fieldStart");
if (!fieldStart.FieldType.Equals(FieldType.FieldHyperlink))
throw new ArgumentException("Field start type must be FieldHyperlink.");
mFieldStart = fieldStart;
// Find the field separator node.
mFieldSeparator = fieldStart.GetField().Separator;
if (mFieldSeparator == null)
throw new InvalidOperationException("Cannot find field separator.");
mFieldEnd = fieldStart.GetField().End;
// Field code looks something like [ HYPERLINK "http:\\www.myurl.com" ], but it can consist of several runs.
string fieldCode = fieldStart.GetField().GetFieldCode();
Match match = gRegex.Match(fieldCode.Trim());
mIsLocal = (match.Groups[1].Length > 0); //The link is local if \l is present in the field code.
mTarget = match.Groups[2].Value;
}
/// <summary>
/// Gets or sets the display name of the hyperlink.
/// </summary>
internal string Name
{
get
{
return GetTextSameParent(mFieldSeparator, mFieldEnd);
}
set
{
// Hyperlink display name is stored in the field result which is a Run
// node between field separator and field end.
Run fieldResult = (Run)mFieldSeparator.NextSibling;
fieldResult.Text = value;
// But sometimes the field result can consist of more than one run, delete these runs.
RemoveSameParent(fieldResult.NextSibling, mFieldEnd);
}
}
/// <summary>
/// Gets or sets the target url or bookmark name of the hyperlink.
/// </summary>
internal string Target
{
get
{
string dummy = null; // This is needed to fool the C# to VB.NET converter.
return mTarget;
}
set
{
mTarget = value;
UpdateFieldCode();
}
}
/// <summary>
/// True if the hyperlink's target is a bookmark inside the document. False if the hyperlink is a url.
/// </summary>
internal bool IsLocal
{
get
{
return mIsLocal;
}
set
{
mIsLocal = value;
UpdateFieldCode();
}
}
private void UpdateFieldCode()
{
// Field code is stored in a Run node between field start and field separator.
Run fieldCode = (Run)mFieldStart.NextSibling;
fieldCode.Text = string.Format("HYPERLINK {0}\"{1}\"", ((mIsLocal) ? "\\l " : ""), mTarget);
// But sometimes the field code can consist of more than one run, delete these runs.
RemoveSameParent(fieldCode.NextSibling, mFieldSeparator);
}
/// <summary>
/// Retrieves text from start up to but not including the end node.
/// </summary>
private static string GetTextSameParent(Node startNode, Node endNode)
{
if ((endNode != null) && (startNode.ParentNode != endNode.ParentNode))
throw new ArgumentException("Start and end nodes are expected to have the same parent.");
StringBuilder builder = new StringBuilder();
for (Node child = startNode; !child.Equals(endNode); child = child.NextSibling)
builder.Append(child.GetText());
return builder.ToString();
}
/// <summary>
/// Removes nodes from start up to but not including the end node.
/// Start and end are assumed to have the same parent.
/// </summary>
private static void RemoveSameParent(Node startNode, Node endNode)
{
if ((endNode != null) && (startNode.ParentNode != endNode.ParentNode))
throw new ArgumentException("Start and end nodes are expected to have the same parent.");
Node curChild = startNode;
while ((curChild != null) && (curChild != endNode))
{
Node nextChild = curChild.NextSibling;
curChild.Remove();
curChild = nextChild;
}
}
private readonly Node mFieldStart;
private readonly Node mFieldSeparator;
private readonly Node mFieldEnd;
private bool mIsLocal;
private string mTarget;
/// <summary>
/// RK I am notoriously bad at regexes. It seems I don't understand their way of thinking.
/// </summary>
private static readonly Regex gRegex = new Regex(
"\\S+" + // one or more non spaces HYPERLINK or other word in other languages
"\\s+" + // one or more spaces
"(?:\"\"\\s+)?" + // non capturing optional "" and one or more spaces, found in one of the customers files.
"(\\\\l\\s+)?" + // optional \l flag followed by one or more spaces
"\"" + // one apostrophe
);
}
}
下一篇: python3 函数参数总结