包涵HTML标记的内容分页
包涵HTML标记的内容分页
发布时间:2015-05-12 来源:查字典编辑
摘要:最近网站要做静态生成内容必须分页所以在网上找了N多实例都不理想,所以花了点时间自己写了个方法目前来说没发现什么问题(已用方法生成20W个ht...

 最近网站要做静态生成内容必须分页所以在网上找了N多实例都不理想,所以花了点时间自己写了个方法

目前来说没发现什么问题(已用方法生成20W个html)

所以把代码贴出来与大家分享。

不足之处或有更好的方法请大家告知我不胜感激。

包涵HTML标记的内容分页1

调用ArrayList arrlt=ContentPage.GetPageContent("分页内容", 分页大小,true);

1using System;
2using System.Text;
3using System.Collections;
4using System.Text.RegularExpressions;
5
6 /**//// <summary>
7 /// 内容分页 v0.1.2 gxian22@163.com 
8 /// </summary>
9public class ContentPage
10{
11 /**//// <summary>
12 /// 内容分页
13 /// </summary>
14 /// <param name="strContent">要分页的字符串内容</param>
15 /// <param name="intPageSize">分页大小</param>
16 /// <param name="isOpen">最后一页字符小于intPageSize的1/4加到上一页</param>
17 /// <returns></returns>
18 public static ArrayList GetPageContent(string strContent, int intPageSize, bool isOpen)
19 {
20 ArrayList arrlist = new ArrayList();
21 string strp = strContent;
22 int num = RemoveHtml(strp.ToString()).Length;//除html标记后的字符长度
23 int bp = (intPageSize + (intPageSize / 5));
24
25 for (int i = 0; i < ((num % bp == 0) ? (num / bp) : ((num / bp) + 1)); i++)
26 {
27 arrlist.Add(SubString(intPageSize, ref strp));
28 num = RemoveHtml(strp.ToString()).Length;
29 if (isOpen && num < (intPageSize / 4))
30 { // 小于分页1/4字符加到上一页
31 arrlist[arrlist.Count - 1] = arrlist[arrlist.Count - 1] + strp;
32 strp = "";
33 }
34 i = 0;
35 }
36 if (strp.Length > 0) arrlist.Add(strp); //大于1/4字符 小于intPageSize 
37
38 return arrlist;
39 }
40
41 /**//// <summary>
42 /// &lt; 符号搜索
43 /// </summary>
44 /// <param name="cr"></param>
45 /// <returns></returns>
46 private static bool IsBegin(char cr)
47 {
48 return cr.Equals('<');
49 }
50
51 /**//// <summary>
52 /// &gt; 符号搜索
53 /// </summary>
54 /// <param name="cr"></param>
55 /// <returns></returns>
56 private static bool IsEnd(char cr)
57 {
58 return cr.Equals('>');
59 }
60
61 /**//// <summary>
62 /// 截取分页内容
63 /// </summary>
64 /// <param name="index">每页字符长度</param>
65 /// <param name="str"></param>
66 /// <returns></returns>
67 private static string SubString(int index, ref string str)
68 {
69 ArrayList arrlistB = new ArrayList();
70 ArrayList arrlistE = new ArrayList();
71 string strTag = "";
72 char strend = '0';
73 bool isBg = false;
74 bool IsSuEndTag = false;
75
76 index = Gindex(str, index);
77 string substr = CutString(str, 0, index); //截取分页长度

 

78 string substr1 = CutString(str, index, str.Length - substr.Length); //剩余字符
79 int iof = substr.LastIndexOf("<"), iof1 = 0;
80
81 防止标记截断#region 防止<xx >标记截断
82
83 if (iof > 0) iof1 = CutString(substr, iof, substr.Length - iof).IndexOf(">"); // 标记是否被截断
84 if (iof1 < 0) //完整标记被截断,则重新截取
85 {
86 index = index + substr1.IndexOf(">") + 1;
87 substr = CutString(str, 0, index);
88 substr1 = CutString(str, index, str.Length - substr.Length);
89 }
90
91 int indexendtb = substr.LastIndexOf("</tr>");
92 if (indexendtb >= 0)
93 {
94 substr = CutString(str, 0, indexendtb);
95 substr1 = CutString(str, indexendtb, str.Length - indexendtb);
96 }
97
98 int intsubstr = substr.LastIndexOf("/>") + 1;
99 int intsubstr1 = substr1.IndexOf("</");
100 if (intsubstr >= 0 && intsubstr1 >= 0) // <xx /> 标记与 </xx>结束标记间是否字符 如:<a href="#"><img src="abc.jpg" />文字文字文字文字</a>
101 {
102 string substr2 = CutString(substr, intsubstr, substr.Length - intsubstr) + CutString(substr1, 0, intsubstr1);
103 if (substr2.IndexOf('>') == -1 && substr2.IndexOf('<') == -1) // 
104 {
105 substr += CutString(substr1, 0, intsubstr1);
106 substr2 = CutString(substr1, intsubstr1, substr1.Length - intsubstr1);
107 int sub2idf = substr2.IndexOf('>');
108 substr += CutString(substr2, 0, sub2idf);
109 substr1 = CutString(substr2, sub2idf, substr2.Length - sub2idf);
110 }
111 }
112 #endregion
113
114 //分析截取字符内容提取标记
115 foreach (char cr in substr)
116 {
117 if (IsBegin(cr)) isBg = true;
118 if (isBg) strTag += cr;
119
120 if (isBg && cr.Equals('/') && strend.Equals('<')) IsSuEndTag = true;
121
122 if (IsEnd(cr))
123 {
124 if (strend.Equals('/')) //跳出 <XX />标记
125 {
126 isBg = false;
127 IsSuEndTag = false;
128 strTag = "";
129 }
130
131 if (isBg)
132 {
133 if (!CutString(strTag.ToLower(), 0, 3).Equals("<br"))
134 {
135 if (IsSuEndTag)
136 arrlistE.Add(strTag); //结束标记
137 else
138 arrlistB.Add(strTag); //开始标记
139 }
140 IsSuEndTag = false;
141 strTag = "";
142 isBg = false;
143 }

144 }
145 strend = cr;
146 }
147
148 //找到未关闭标记
149 for (int b = 0; b < arrlistB.Count; b++)
150 {
151 for (int e = 0; e < arrlistE.Count; e++)
152 {
153 string strb = arrlistB[b].ToString().ToLower();
154 int num = strb.IndexOf(' ');
155 if (num > 0) strb = CutString(strb, 0, num) + ">";
156 if (strb.ToLower().Replace("<", "</").Equals(arrlistE[e].ToString().ToLower()))
157 {
158 arrlistB.RemoveAt(b);
159 arrlistE.RemoveAt(e);
160 b = -1;
161 break;
162 }
163 }
164 }
165
166 //关闭被截断标记
167 for (int i = arrlistB.Count; i > 0; i--)
168 {
169 string stral = arrlistB[i - 1].ToString();
170 substr += (stral.IndexOf(" ") == -1 ? stral.Replace(">", "/>") : CutString(stral, 0, stral.IndexOf(" ")) + "/>");
171 }
172 //补全上页截断的标签
173 string strtag = "";
174 for (int i = 0; i < arrlistB.Count; i++) strtag += arrlistB[i].ToString();
175
176 str = strtag + substr1; //更改原始字符串
177 return substr; //返回截取内容
178 }
179
180 /**//// <summary>
181 /// 返回真实字符长度
182 /// </summary>
183 /// <param name="str"></param>
184 /// <param name="index"></param>
185 /// <returns></returns>
186 private static int Gindex(string str, int index)
187 {
188 bool isBg = false;
189 bool isSuEndTag = false;
190 bool isNbsp = false, isRnbsp = false; ;
191 string strnbsp = "";
192 int i = 0, c = 0;
193 foreach (char cr in str)
194 {
195 if (!isBg && IsBegin(cr)) { isBg = true; isSuEndTag = false; }
196 if (isBg && IsEnd(cr)) { isBg = false; isSuEndTag = true; }
197 if (isSuEndTag && !isBg)
198 { //不在html标记内
199 if (cr.Equals('&')) isNbsp = true;
200 if (isNbsp)
201 {
202 strnbsp += cr.ToString();
203 if (strnbsp.Length > 6) { isNbsp = false; strnbsp = ""; }
204 if (cr.Equals(';')) isNbsp = false;//
205 }
206 if (!isNbsp && !"".Equals(strnbsp)) isRnbsp = strnbsp.ToLower().Equals("&nbsp;");
207 }
208
209 if (isSuEndTag && !cr.Equals('n') && !cr.Equals('r') && !cr.Equals(' ')) c++;
210 if (isRnbsp) { c = c - 6; isRnbsp = false; strnbsp = ""; }
211
212 i++;

213
214 if (c == index) return i;
215 }
216 return i;
217 }
218
219 /**//// <summary>
220 /// 移除Html标记
221 /// </summary>
222 /// <param name="content"></param>
223 /// <returns></returns>
224 public static string RemoveHtml(string content)
225 {
226 content = Regex.Replace(content, @"<[^>]*>", string.Empty, RegexOptions.IgnoreCase);
227 return Regex.Replace(content, "&nbsp;", string.Empty, RegexOptions.IgnoreCase);
228 }
229
230 /**//// <summary>
231 /// 从字符串的指定位置截取指定长度的子字符串
232 /// </summary>
233 /// <param name="str">原字符串</param>
234 /// <param name="startIndex">子字符串的起始位置</param>
235 /// <param name="length">子字符串的长度</param>
236 /// <returns>子字符串</returns>
237 public static string CutString(string str, int startIndex, int length)
238 {
239 if (startIndex >= 0)
240 {
241 if (length < 0)
242 {
243 length = length * -1;
244 if (startIndex - length < 0)
245 {
246 length = startIndex;
247 startIndex = 0;
248 }
249 else
250 {
251 startIndex = startIndex - length;
252 }
253 }
254
255 if (startIndex > str.Length) return "";
256
257 }
258 else
259 {
260 if (length < 0)
261 {
262 return "";
263 }
264 else
265 {
266 if (length + startIndex > 0)
267 {
268 length = length + startIndex;
269 startIndex = 0;
270 }
271 else
272 {
273 return "";
274 }
275 }
276 }
277
278 if (str.Length - startIndex < length) length = str.Length - startIndex;
279 try
280 {
281 return str.Substring(startIndex, length);
282 }
283 catch
284 {
285 return str;
286 }
287 }
288}
289
推荐文章
猜你喜欢
附近的人在看
推荐阅读
拓展阅读
相关阅读
网友关注
最新脚本HTML教程学习
热门脚本HTML教程学习
网页设计子分类