This commit is contained in:
aixianling
2025-01-09 17:45:40 +08:00
commit 5c9f1dae4a
3482 changed files with 1146531 additions and 0 deletions

View File

@@ -0,0 +1,19 @@
EXPORTS
;MBCSDef
CreateMBCSFilter
FreeMBCSFilter
MemoryUsageOfMBCSFilter
LoadMBCSFilterWords
SaveMBCSFilterWords
MatchMBCSFilterWord
AddMBCSFilterStrToTable
;UCSDef
CreateUCSFilter
FreeUCSFilter
MemoryUsageOfUCSFilter
LoadUCSFilterWords
SaveUCSFilterWords
MatchUCSFilterWord
AddUCSFilterStrToTable

View File

@@ -0,0 +1,8 @@
#ifndef _FASTFTMAIN_H_
#define _FASTFTMAIN_H_
#include "MBCSDef.h"
//#include "UCSDef.h"
#endif

View File

@@ -0,0 +1,69 @@
#--------------------------makefile description-----------------------------#
#use for gcc/g++ compiler to complate/link
#edited: miros
#edit time: Sep 8, 2007
#------------------------compiler and compiler fags-------------------------#
COMPILER=gcc
#COMPLATEFLAGS=-g -D LINUX -D GUN_GCC -I ./../../common/
COMPLATEFLAGS=-g -D FREEBSD -D GUN_GCC -I ./../../common/
LINKER=ar
LINKFLAGS=cvr
DEBUGER=gdb
#--------------------------project settings---------------------------------#
PROJECTNAME=../libs/bsd/dbg/fastFT.a
INCLUDES=./MBCSFilter.c\
./UCSFilter.c
#LIBS=
LIBS=
OBJECTS=./MBCSFilter.o ./UCSFilter.o
#EXTEDOBJECTS=
EXTEDOBJECTS=
#------------------------------make actions---------------------------------#
#use build to be default command
build: complate link
complate:
@echo "compiler: ${COMPILER}"
@echo "begin complate source files:"
@echo "${INCLUDES}"
@$(COMPILER) $(COMPLATEFLAGS) $(INCLUDES) -c $^
@echo "complate succeeded"
recomplate: cleanobjs complate
link:
@echo "linker: ${LINKER}"
@echo "begin link object files:"
@echo "${OBJECTS}"
@$(LINKER) $(LINKFLAGS) $(PROJECTNAME) $(OBJECTS)
@echo "Done!"
cleanobjs:
@-rm $(OBJECTS)
cleanapps:
@-rm $(PROJECTNAME)
cleanall: cleanobjs cleanapps
rebuild: cleanall build
build-clear: build cleanobjs
all: rebuild cleanobjs
#----------------------------exports labdles--------------------------------#
.PHONY: all build rebuild complate recomplate link cleanobjs cleanapps cleanall build-clear

View File

@@ -0,0 +1,74 @@
#ifndef _MBCSDEF_H_
#define _MBCSDEF_H_
#define MBCSARRAY_SIZE 255
#define NEW_FILTER
#pragma pack(push, 1)
typedef struct tagFT_mbcs_char
{
char wordEnd; //到此字符是否构成一个完整的屏蔽词
#ifdef NEW_FILTER
unsigned char idx[MBCSARRAY_SIZE];
unsigned char count;
tagFT_mbcs_char** mem;
#else
struct tagFT_mbcs_char* next[MBCSARRAY_SIZE]; //当前字符的后续字符表,访问索引为字符值-1
#endif
#ifdef NEW_FILTER
tagFT_mbcs_char* getNext(int index)
{
if (!mem) return NULL;
unsigned char ridx = idx[index];
if (ridx == 0) return NULL;
return mem[ridx - 1];
}
void addNext(int index, tagFT_mbcs_char* next);
#else
tagFT_mbcs_char* getNext(int index)
{
return next[index];
}
void addNext(int index, tagFT_mbcs_char* next)
{
next[index] = next;
}
#endif
}FT_MBCS_CHAR, *PFT_MBCS_CHAR;
#pragma pack(pop)
#ifdef __cplusplus
extern "C"
{
#endif
#ifndef STDCALL
#if (defined(WIN32) || defined(WINDOWS))
#define STDCALL __stdcall
#else
#define STDCALL
#endif
#endif
/*
创建一个MBCS屏蔽词表
*/
PFT_MBCS_CHAR CreateMBCSFilter(void*);
/*
销毁一个MBCS屏蔽词表释放表使用的所有内存
*/
void STDCALL FreeMBCSFilter(PFT_MBCS_CHAR filter);
int STDCALL LoadMBCSFilterWords(PFT_MBCS_CHAR filter, const char *fn);
char* STDCALL MatchMBCSFilterWord(const PFT_MBCS_CHAR filter, const char *input, int *matchLen);
//把一个字符串加入到屏蔽词表中
int STDCALL AddMBCSFilterStrToTable(PFT_MBCS_CHAR filter,const char* str);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -0,0 +1,174 @@
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "mbcs_def.h"
#include "memory/base_allocator.hpp"
#define toupper(c) (((c) >= 'a' && (c) <= 'z') ? (c) ^ 0x20 : (c))
BaseAllocator mbcsFilter("mbcsFilter");
#ifdef NEW_FILTER
void tagFT_mbcs_char::addNext(int index, tagFT_mbcs_char* next)
{
unsigned char ridx = idx[index];
assert(ridx == 0);
mem = (tagFT_mbcs_char**)mbcsFilter.ReAllocBuffer(mem, (count + 1) * sizeof(tagFT_mbcs_char*));
mem[count] = next;
count++;
idx[index] = count;
}
#endif
//把一个字符串加入到屏蔽词表中
int STDCALL AddMBCSFilterStrToTable(PFT_MBCS_CHAR filter, const char* str)
{
int len = (int)strlen(str);
PFT_MBCS_CHAR prev = filter;
if (!filter)
return 0;
for (int i = 0; i < len; i++)
{
unsigned char index = str[i];
index = toupper(index) - 1;
PFT_MBCS_CHAR next = prev->getNext(index);
if (!next)
{
next = CreateMBCSFilter(0);
prev->addNext(index, next);
}
if (i == (len - 1))
next->wordEnd = 1;
prev = next;
}
return 1;
}
//-----------------------------------------------------------------------
PFT_MBCS_CHAR CreateMBCSFilter(void*)
{
PFT_MBCS_CHAR pMBCSChar =
(PFT_MBCS_CHAR)mbcsFilter.AllocBuffer(sizeof(FT_MBCS_CHAR));
memset(pMBCSChar, 0, sizeof(FT_MBCS_CHAR));
return pMBCSChar;
}
void STDCALL FreeMBCSFilter(PFT_MBCS_CHAR filter)
{
if (!filter) return;
#ifdef NEW_FILTER
if (filter->count > 0)
{
for (int i = 0; i < filter->count; i++)
{
PFT_MBCS_CHAR next = filter->mem[i];
if (next) FreeMBCSFilter(next);
}
}
mbcsFilter.FreeBuffer(filter->mem);
#else
for (int i = 0; i < MBCSARRAY_SIZE; i++)
{
PFT_MBCS_CHAR next = filter->next[i];
if (next) FreeMBCSFilter(next);
}
#endif
mbcsFilter.FreeBuffer(filter);
filter = NULL;
}
int STDCALL LoadMBCSFilterWords(PFT_MBCS_CHAR filter, const char* fn)
{
int nStrCount = 0;
char szText[256] = {0};
char* pStr = 0;
FILE* fl = fopen(fn, "r");
//检查并跳过UTF-8文件BOM
fread(szText, 3, 1, fl);
if ((*(int*)szText & 0x00FFFFFF) != 0xBFBBEF)
fseek(fl, 0, 0);
while ((pStr = fgets(szText, 256, fl)))
{
size_t nLen = strlen(szText);
while (nLen > 0 && ((unsigned int)szText[nLen - 1] <= 0x20))
{
szText[nLen - 1] = 0;
nLen--;
}
if (nLen > 0 && AddMBCSFilterStrToTable(filter, szText))
++nStrCount;
}
fclose(fl);
return nStrCount;
}
char* STDCALL MatchMBCSFilterWord(const PFT_MBCS_CHAR filter, const char* input, int* matchLen)
{
char* matchStart = NULL; //被匹配的输入字符串的起始字符指针
char* matchEnd = NULL; //被匹配的字符串的结束字符之后的一个字符的指针,此值减去lpMatchStart即为被匹配字符串的长度
unsigned char ch;
while ((ch = *input))
{
ch = toupper(ch);
PFT_MBCS_CHAR curFTChar = filter->getNext(ch - 1);
if (curFTChar)
{
char* test = matchStart = (char*)input;
do
{
test++;
//到该字符是否处构成了完整的匹配词
if (curFTChar->wordEnd)
matchEnd = test;
//继续向后匹配
ch = *test;
if (!ch) break;
ch = toupper(ch);
curFTChar = curFTChar->getNext(ch - 1);
}
while (curFTChar && *test);
//出现了完整的匹配词则终止
if (matchEnd)
break;
}
//对于MBCS进入下一个字符应当跳过2个字符
//if ( (unsigned char)(*pInput) >= 0x80 )
// pInput += 2;
//else pInput += 1;
input++;
}
char* result = (matchEnd ? matchStart : NULL);
if (matchLen)
*matchLen = (int)(size_t)(result ? (matchEnd - matchStart) : 0);
return result;
}