Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
nx-libs
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
1
Issues
1
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
dimbor
nx-libs
Commits
53d304ef
Unverified
Commit
53d304ef
authored
Nov 03, 2016
by
Mihai Moldovan
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'sunweaver-pr/drop-fbmmx' into arctica-3.6.x
Attributes GH PR #253:
https://github.com/ArcticaProject/nx-libs/pull/253
parents
d12b7754
f5c5cb10
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
3 additions
and
2780 deletions
+3
-2780
Imakefile
nx-X11/programs/Xserver/fb/Imakefile
+3
-22
fbcopy.c
nx-X11/programs/Xserver/fb/fbcopy.c
+0
-25
fbfill.c
nx-X11/programs/Xserver/fb/fbfill.c
+0
-6
fbmmx.c
nx-X11/programs/Xserver/fb/fbmmx.c
+0
-2276
fbmmx.h
nx-X11/programs/Xserver/fb/fbmmx.h
+0
-220
fbpict.c
nx-X11/programs/Xserver/fb/fbpict.c
+0
-231
No files found.
nx-X11/programs/Xserver/fb/Imakefile
View file @
53d304ef
/*
* The X.org 6.8.99.16 snapshot fails to compile with GCC 4.
* Temporarily disable the MMX features until the bug is
* fixed.
*
#if defined(HasGcc34) && HasGcc34
MMXOPTIONS= -mmmx -msse -Winline --param inline-unit-growth=10000 \
--param large-function-growth=10000 -DUSE_MMX
USEMMXOPTIONS= -DUSE_MMX
#if defined(i386Architecture) || defined(AMD64Architecture)
SpecialCObjectRule(fbmmx,fbmmx.c,$(MMXOPTIONS))
SpecialCObjectRule(fbpict,fbpict.c,$(USEMMXOPTIONS))
SpecialCObjectRule(fbfill,fbfill.c,$(USEMMXOPTIONS))
SpecialCObjectRule(fbcopy,fbcopy.c,$(USEMMXOPTIONS))
#endif
#endif
*/
NULL =
#include <Server.tmpl>
...
...
@@ -73,7 +55,7 @@ SRCS = $(XFMODSRC) \
fbwindow.c \
fb24_32.c \
fbpict.c \
fbmmx.c \
$(NULL)
OBJS = $(XFMODOBJ) \
fbarc.o \
...
...
@@ -109,7 +91,7 @@ OBJS = $(XFMODOBJ) \
fbwindow.o \
fb24_32.o \
fbpict.o \
fbmmx.o \
$(NULL)
INCLUDES = -I$(SERVERSRC)/fb -I$(SERVERSRC)/mi -I$(SERVERSRC)/include \
-I$(XINCLUDESRC) \
...
...
@@ -170,7 +152,6 @@ LinkSourceFile(fbtile.c,LinkDirectory)
LinkSourceFile(fbtrap.c,LinkDirectory)
LinkSourceFile(fbutil.c,LinkDirectory)
LinkSourceFile(fbwindow.c,LinkDirectory)
LinkSourceFile(fbmmx.c,LinkDirectory)
#endif
InstallDriverSDKLibraryModule(fb,$(DRIVERSDKMODULEDIR),.)
...
...
nx-X11/programs/Xserver/fb/fbcopy.c
View file @
53d304ef
...
...
@@ -30,7 +30,6 @@
#ifdef IN_MODULE
#include "xf86_ansic.h"
#endif
#include "fbmmx.h"
void
fbCopyNtoN
(
DrawablePtr
pSrcDrawable
,
...
...
@@ -61,27 +60,6 @@ fbCopyNtoN (DrawablePtr pSrcDrawable,
while
(
nbox
--
)
{
#ifdef USE_MMX
if
(
pm
==
FB_ALLONES
&&
alu
==
GXcopy
&&
!
reverse
&&
!
upsidedown
&&
fbHaveMMX
())
{
if
(
!
fbCopyAreammx
(
pSrcDrawable
,
pDstDrawable
,
(
pbox
->
x1
+
dx
),
(
pbox
->
y1
+
dy
),
(
pbox
->
x1
),
(
pbox
->
y1
),
(
pbox
->
x2
-
pbox
->
x1
),
(
pbox
->
y2
-
pbox
->
y1
)))
goto
fallback
;
else
goto
next
;
}
fallback:
#endif
fbBlt
(
src
+
(
pbox
->
y1
+
dy
+
srcYoff
)
*
srcStride
,
srcStride
,
(
pbox
->
x1
+
dx
+
srcXoff
)
*
srcBpp
,
...
...
@@ -99,9 +77,6 @@ fbCopyNtoN (DrawablePtr pSrcDrawable,
reverse
,
upsidedown
);
#ifdef USE_MMX
next:
#endif
pbox
++
;
}
}
...
...
nx-X11/programs/Xserver/fb/fbfill.c
View file @
53d304ef
...
...
@@ -27,7 +27,6 @@
#endif
#include "fb.h"
#include "fbmmx.h"
void
fbFill
(
DrawablePtr
pDrawable
,
...
...
@@ -47,11 +46,6 @@ fbFill (DrawablePtr pDrawable,
switch
(
pGC
->
fillStyle
)
{
case
FillSolid
:
#ifdef USE_MMX
if
(
!
pPriv
->
and
&&
fbHaveMMX
())
if
(
fbSolidFillmmx
(
pDrawable
,
x
,
y
,
width
,
height
,
pPriv
->
xor
))
return
;
#endif
fbSolid
(
dst
+
(
y
+
dstYoff
)
*
dstStride
,
dstStride
,
(
x
+
dstXoff
)
*
dstBpp
,
...
...
nx-X11/programs/Xserver/fb/fbmmx.c
deleted
100644 → 0
View file @
d12b7754
/*
* Copyright © 2004, 2005 Red Hat, Inc.
* Copyright © 2004 Nicholas Miell
* Copyright © 2005 Trolltech AS
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Red Hat not be used in advertising or
* publicity pertaining to distribution of the software without specific,
* written prior permission. Red Hat makes no representations about the
* suitability of this software for any purpose. It is provided "as is"
* without express or implied warranty.
*
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*
* Author: Søren Sandmann (sandmann@redhat.com)
* Minor Improvements: Nicholas Miell (nmiell@gmail.com)
* MMX code paths for fbcompose.c by Lars Knoll (lars@trolltech.com)
*
* Based on work by Owen Taylor
*/
#ifdef HAVE_DIX_CONFIG_H
#include <dix-config.h>
#endif
#ifdef USE_MMX
#if defined(__amd64__) || defined(__x86_64__)
#define USE_SSE
#endif
#include <mmintrin.h>
#include <xmmintrin.h>
/* for _mm_shuffle_pi16 and _MM_SHUFFLE */
#ifdef RENDER
#include "fb.h"
#include "fbmmx.h"
#include "picturestr.h"
#include "mipict.h"
#include "fbpict.h"
#define noVERBOSE
#ifdef VERBOSE
#define CHECKPOINT() ErrorF ("at %s %d\n", __FUNCTION__, __LINE__)
#else
#define CHECKPOINT()
#endif
/* Notes about writing mmx code
*
* give memory operands as the second operand. If you give it as the
* first, gcc will first load it into a register, then use that
* register
*
* ie. use
*
* _mm_mullo_pi16 (x, mmx_constant);
*
* not
*
* _mm_mullo_pi16 (mmx_constant, x);
*
* Also try to minimize dependencies. i.e. when you need a value, try
* to calculate it from a value that was calculated as early as
* possible.
*/
/* --------------- MMX primitivess ------------------------------------ */
typedef
unsigned
long
long
ullong
;
typedef
struct
{
ullong
mmx_4x00ff
;
ullong
mmx_4x0080
;
ullong
mmx_565_rgb
;
ullong
mmx_565_unpack_multiplier
;
ullong
mmx_565_r
;
ullong
mmx_565_g
;
ullong
mmx_565_b
;
ullong
mmx_mask_0
;
ullong
mmx_mask_1
;
ullong
mmx_mask_2
;
ullong
mmx_mask_3
;
ullong
mmx_full_alpha
;
ullong
mmx_ffff0000ffff0000
;
ullong
mmx_0000ffff00000000
;
ullong
mmx_000000000000ffff
;
}
MMXData
;
static
const
MMXData
c
=
{
.
mmx_4x00ff
=
0x00ff00ff00ff00ffULL
,
.
mmx_4x0080
=
0x0080008000800080ULL
,
.
mmx_565_rgb
=
0x000001f0003f001fULL
,
.
mmx_565_r
=
0x000000f800000000ULL
,
.
mmx_565_g
=
0x0000000000fc0000ULL
,
.
mmx_565_b
=
0x00000000000000f8ULL
,
.
mmx_mask_0
=
0xffffffffffff0000ULL
,
.
mmx_mask_1
=
0xffffffff0000ffffULL
,
.
mmx_mask_2
=
0xffff0000ffffffffULL
,
.
mmx_mask_3
=
0x0000ffffffffffffULL
,
.
mmx_full_alpha
=
0x00ff000000000000ULL
,
.
mmx_565_unpack_multiplier
=
0x0000008404100840ULL
,
.
mmx_ffff0000ffff0000
=
0xffff0000ffff0000ULL
,
.
mmx_0000ffff00000000
=
0x0000ffff00000000ULL
,
.
mmx_000000000000ffff
=
0x000000000000ffffULL
,
};
#define MC(x) ((__m64) c.mmx_##x)
static
__inline__
__m64
shift
(
__m64
v
,
int
s
)
{
if
(
s
>
0
)
return
_mm_slli_si64
(
v
,
s
);
else
if
(
s
<
0
)
return
_mm_srli_si64
(
v
,
-
s
);
else
return
v
;
}
static
__inline__
__m64
negate
(
__m64
mask
)
{
return
_mm_xor_si64
(
mask
,
MC
(
4
x00ff
));
}
static
__inline__
__m64
pix_multiply
(
__m64
a
,
__m64
b
)
{
__m64
res
;
res
=
_mm_mullo_pi16
(
a
,
b
);
res
=
_mm_adds_pu16
(
res
,
MC
(
4
x0080
));
res
=
_mm_adds_pu16
(
res
,
_mm_srli_pi16
(
res
,
8
));
res
=
_mm_srli_pi16
(
res
,
8
);
return
res
;
}
static
__inline__
__m64
pix_add
(
__m64
a
,
__m64
b
)
{
return
_mm_adds_pu8
(
a
,
b
);
}
#ifdef USE_SSE
static
__inline__
__m64
expand_alpha
(
__m64
pixel
)
{
return
_mm_shuffle_pi16
(
pixel
,
_MM_SHUFFLE
(
3
,
3
,
3
,
3
));
}
static
__inline__
__m64
expand_alpha_rev
(
__m64
pixel
)
{
return
_mm_shuffle_pi16
(
pixel
,
_MM_SHUFFLE
(
0
,
0
,
0
,
0
));
}
static
__inline__
__m64
invert_colors
(
__m64
pixel
)
{
return
_mm_shuffle_pi16
(
pixel
,
_MM_SHUFFLE
(
3
,
0
,
1
,
2
));
}
#else
static
__inline__
__m64
expand_alpha
(
__m64
pixel
)
{
__m64
t1
,
t2
;
t1
=
shift
(
pixel
,
-
48
);
t2
=
shift
(
t1
,
16
);
t1
=
_mm_or_si64
(
t1
,
t2
);
t2
=
shift
(
t1
,
32
);
t1
=
_mm_or_si64
(
t1
,
t2
);
return
t1
;
}
static
__inline__
__m64
expand_alpha_rev
(
__m64
pixel
)
{
__m64
t1
,
t2
;
/* move alpha to low 16 bits and zero the rest */
t1
=
shift
(
pixel
,
48
);
t1
=
shift
(
t1
,
-
48
);
t2
=
shift
(
t1
,
16
);
t1
=
_mm_or_si64
(
t1
,
t2
);
t2
=
shift
(
t1
,
32
);
t1
=
_mm_or_si64
(
t1
,
t2
);
return
t1
;
}
static
__inline__
__m64
invert_colors
(
__m64
pixel
)
{
__m64
x
,
y
,
z
;
x
=
y
=
z
=
pixel
;
x
=
_mm_and_si64
(
x
,
MC
(
ffff0000ffff0000
));
y
=
_mm_and_si64
(
y
,
MC
(
000000000000
ffff
));
z
=
_mm_and_si64
(
z
,
MC
(
0000
ffff00000000
));
y
=
shift
(
y
,
32
);
z
=
shift
(
z
,
-
32
);
x
=
_mm_or_si64
(
x
,
y
);
x
=
_mm_or_si64
(
x
,
z
);
return
x
;
}
#endif
static
__inline__
__m64
over
(
__m64
src
,
__m64
srca
,
__m64
dest
)
{
return
_mm_adds_pu8
(
src
,
pix_multiply
(
dest
,
negate
(
srca
)));
}
static
__inline__
__m64
over_rev_non_pre
(
__m64
src
,
__m64
dest
)
{
__m64
srca
=
expand_alpha
(
src
);
__m64
srcfaaa
=
_mm_or_si64
(
srca
,
MC
(
full_alpha
));
return
over
(
pix_multiply
(
invert_colors
(
src
),
srcfaaa
),
srca
,
dest
);
}
static
__inline__
__m64
in
(
__m64
src
,
__m64
mask
)
{
return
pix_multiply
(
src
,
mask
);
}
static
__inline__
__m64
in_over
(
__m64
src
,
__m64
srca
,
__m64
mask
,
__m64
dest
)
{
return
over
(
in
(
src
,
mask
),
pix_multiply
(
srca
,
mask
),
dest
);
}
static
__inline__
__m64
load8888
(
CARD32
v
)
{
return
_mm_unpacklo_pi8
(
_mm_cvtsi32_si64
(
v
),
_mm_setzero_si64
());
}
static
__inline__
__m64
pack8888
(
__m64
lo
,
__m64
hi
)
{
return
_mm_packs_pu16
(
lo
,
hi
);
}
static
__inline__
CARD32
store8888
(
__m64
v
)
{
return
_mm_cvtsi64_si32
(
pack8888
(
v
,
_mm_setzero_si64
()));
}
/* Expand 16 bits positioned at @pos (0-3) of a mmx register into
*
* 00RR00GG00BB
*
* --- Expanding 565 in the low word ---
*
* m = (m << (32 - 3)) | (m << (16 - 5)) | m;
* m = m & (01f0003f001f);
* m = m * (008404100840);
* m = m >> 8;
*
* Note the trick here - the top word is shifted by another nibble to
* avoid it bumping into the middle word
*/
static
__inline__
__m64
expand565
(
__m64
pixel
,
int
pos
)
{
__m64
p
=
pixel
;
__m64
t1
,
t2
;
/* move pixel to low 16 bit and zero the rest */
p
=
shift
(
shift
(
p
,
(
3
-
pos
)
*
16
),
-
48
);
t1
=
shift
(
p
,
36
-
11
);
t2
=
shift
(
p
,
16
-
5
);
p
=
_mm_or_si64
(
t1
,
p
);
p
=
_mm_or_si64
(
t2
,
p
);
p
=
_mm_and_si64
(
p
,
MC
(
565
_rgb
));
pixel
=
_mm_mullo_pi16
(
p
,
MC
(
565
_unpack_multiplier
));
return
_mm_srli_pi16
(
pixel
,
8
);
}
static
__inline__
__m64
expand8888
(
__m64
in
,
int
pos
)
{
if
(
pos
==
0
)
return
_mm_unpacklo_pi8
(
in
,
_mm_setzero_si64
());
else
return
_mm_unpackhi_pi8
(
in
,
_mm_setzero_si64
());
}
static
__inline__
__m64
pack565
(
__m64
pixel
,
__m64
target
,
int
pos
)
{
__m64
p
=
pixel
;
__m64
t
=
target
;
__m64
r
,
g
,
b
;
r
=
_mm_and_si64
(
p
,
MC
(
565
_r
));
g
=
_mm_and_si64
(
p
,
MC
(
565
_g
));
b
=
_mm_and_si64
(
p
,
MC
(
565
_b
));
r
=
shift
(
r
,
-
(
32
-
8
)
+
pos
*
16
);
g
=
shift
(
g
,
-
(
16
-
3
)
+
pos
*
16
);
b
=
shift
(
b
,
-
(
0
+
3
)
+
pos
*
16
);
if
(
pos
==
0
)
t
=
_mm_and_si64
(
t
,
MC
(
mask_0
));
else
if
(
pos
==
1
)
t
=
_mm_and_si64
(
t
,
MC
(
mask_1
));
else
if
(
pos
==
2
)
t
=
_mm_and_si64
(
t
,
MC
(
mask_2
));
else
if
(
pos
==
3
)
t
=
_mm_and_si64
(
t
,
MC
(
mask_3
));
p
=
_mm_or_si64
(
r
,
t
);
p
=
_mm_or_si64
(
g
,
p
);
return
_mm_or_si64
(
b
,
p
);
}
static
__inline__
__m64
pix_add_mul
(
__m64
x
,
__m64
a
,
__m64
y
,
__m64
b
)
{
x
=
_mm_mullo_pi16
(
x
,
a
);
y
=
_mm_mullo_pi16
(
y
,
b
);
x
=
_mm_srli_pi16
(
x
,
1
);
y
=
_mm_srli_pi16
(
y
,
1
);
x
=
_mm_adds_pu16
(
x
,
y
);
x
=
_mm_adds_pu16
(
x
,
_mm_srli_pi16
(
x
,
8
));
x
=
_mm_adds_pu16
(
x
,
MC
(
4
x0080
));
x
=
_mm_srli_pi16
(
x
,
7
);
return
x
;
}
/* --------------- MMX code patch for fbcompose.c --------------------- */
static
FASTCALL
void
mmxCombineMaskU
(
CARD32
*
src
,
const
CARD32
*
mask
,
int
width
)
{
const
CARD32
*
end
=
mask
+
width
;
while
(
mask
<
end
)
{
__m64
a
=
load8888
(
*
mask
);
__m64
s
=
load8888
(
*
src
);
a
=
expand_alpha
(
a
);
s
=
pix_multiply
(
s
,
a
);
*
src
=
store8888
(
s
);
++
src
;
++
mask
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineOverU
(
CARD32
*
dest
,
const
CARD32
*
src
,
int
width
)
{
const
CARD32
*
end
=
dest
+
width
;
while
(
dest
<
end
)
{
__m64
s
,
sa
;
s
=
load8888
(
*
src
);
sa
=
expand_alpha
(
s
);
*
dest
=
store8888
(
over
(
s
,
sa
,
load8888
(
*
dest
)));
++
dest
;
++
src
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineOverReverseU
(
CARD32
*
dest
,
const
CARD32
*
src
,
int
width
)
{
const
CARD32
*
end
=
dest
+
width
;
while
(
dest
<
end
)
{
__m64
d
,
da
;
d
=
load8888
(
*
dest
);
da
=
expand_alpha
(
d
);
*
dest
=
store8888
(
over
(
d
,
da
,
load8888
(
*
src
)));
++
dest
;
++
src
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineInU
(
CARD32
*
dest
,
const
CARD32
*
src
,
int
width
)
{
const
CARD32
*
end
=
dest
+
width
;
while
(
dest
<
end
)
{
__m64
x
,
a
;
x
=
load8888
(
*
src
);
a
=
load8888
(
*
dest
);
a
=
expand_alpha
(
a
);
x
=
pix_multiply
(
x
,
a
);
*
dest
=
store8888
(
x
);
++
dest
;
++
src
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineInReverseU
(
CARD32
*
dest
,
const
CARD32
*
src
,
int
width
)
{
const
CARD32
*
end
=
dest
+
width
;
while
(
dest
<
end
)
{
__m64
x
,
a
;
x
=
load8888
(
*
dest
);
a
=
load8888
(
*
src
);
a
=
expand_alpha
(
a
);
x
=
pix_multiply
(
x
,
a
);
*
dest
=
store8888
(
x
);
++
dest
;
++
src
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineOutU
(
CARD32
*
dest
,
const
CARD32
*
src
,
int
width
)
{
const
CARD32
*
end
=
dest
+
width
;
while
(
dest
<
end
)
{
__m64
x
,
a
;
x
=
load8888
(
*
src
);
a
=
load8888
(
*
dest
);
a
=
expand_alpha
(
a
);
a
=
negate
(
a
);
x
=
pix_multiply
(
x
,
a
);
*
dest
=
store8888
(
x
);
++
dest
;
++
src
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineOutReverseU
(
CARD32
*
dest
,
const
CARD32
*
src
,
int
width
)
{
const
CARD32
*
end
=
dest
+
width
;
while
(
dest
<
end
)
{
__m64
x
,
a
;
x
=
load8888
(
*
dest
);
a
=
load8888
(
*
src
);
a
=
expand_alpha
(
a
);
a
=
negate
(
a
);
x
=
pix_multiply
(
x
,
a
);
*
dest
=
store8888
(
x
);
++
dest
;
++
src
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineAtopU
(
CARD32
*
dest
,
const
CARD32
*
src
,
int
width
)
{
const
CARD32
*
end
=
dest
+
width
;
while
(
dest
<
end
)
{
__m64
s
,
da
,
d
,
sia
;
s
=
load8888
(
*
src
);
d
=
load8888
(
*
dest
);
sia
=
expand_alpha
(
s
);
sia
=
negate
(
sia
);
da
=
expand_alpha
(
d
);
s
=
pix_add_mul
(
s
,
da
,
d
,
sia
);
*
dest
=
store8888
(
s
);
++
dest
;
++
src
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineAtopReverseU
(
CARD32
*
dest
,
const
CARD32
*
src
,
int
width
)
{
const
CARD32
*
end
;
end
=
dest
+
width
;
while
(
dest
<
end
)
{
__m64
s
,
dia
,
d
,
sa
;
s
=
load8888
(
*
src
);
d
=
load8888
(
*
dest
);
sa
=
expand_alpha
(
s
);
dia
=
expand_alpha
(
d
);
dia
=
negate
(
dia
);
s
=
pix_add_mul
(
s
,
dia
,
d
,
sa
);
*
dest
=
store8888
(
s
);
++
dest
;
++
src
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineXorU
(
CARD32
*
dest
,
const
CARD32
*
src
,
int
width
)
{
const
CARD32
*
end
=
dest
+
width
;
while
(
dest
<
end
)
{
__m64
s
,
dia
,
d
,
sia
;
s
=
load8888
(
*
src
);
d
=
load8888
(
*
dest
);
sia
=
expand_alpha
(
s
);
dia
=
expand_alpha
(
d
);
sia
=
negate
(
sia
);
dia
=
negate
(
dia
);
s
=
pix_add_mul
(
s
,
dia
,
d
,
sia
);
*
dest
=
store8888
(
s
);
++
dest
;
++
src
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineAddU
(
CARD32
*
dest
,
const
CARD32
*
src
,
int
width
)
{
const
CARD32
*
end
=
dest
+
width
;
while
(
dest
<
end
)
{
__m64
s
,
d
;
s
=
load8888
(
*
src
);
d
=
load8888
(
*
dest
);
s
=
pix_add
(
s
,
d
);
*
dest
=
store8888
(
s
);
++
dest
;
++
src
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineSaturateU
(
CARD32
*
dest
,
const
CARD32
*
src
,
int
width
)
{
const
CARD32
*
end
=
dest
+
width
;
while
(
dest
<
end
)
{
CARD32
s
=
*
src
;
CARD32
d
=
*
dest
;
__m64
ms
=
load8888
(
s
);
__m64
md
=
load8888
(
d
);
CARD32
sa
=
s
>>
24
;
CARD32
da
=
~
d
>>
24
;
if
(
sa
>
da
)
{
__m64
msa
=
load8888
(
FbIntDiv
(
da
,
sa
));
msa
=
expand_alpha
(
msa
);
ms
=
pix_multiply
(
ms
,
msa
);
}
md
=
pix_add
(
md
,
ms
);
*
dest
=
store8888
(
md
);
++
src
;
++
dest
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineSrcC
(
CARD32
*
dest
,
CARD32
*
src
,
CARD32
*
mask
,
int
width
)
{
const
CARD32
*
end
=
src
+
width
;
while
(
src
<
end
)
{
__m64
a
=
load8888
(
*
mask
);
__m64
s
=
load8888
(
*
src
);
s
=
pix_multiply
(
s
,
a
);
*
dest
=
store8888
(
s
);
++
src
;
++
mask
;
++
dest
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineOverC
(
CARD32
*
dest
,
CARD32
*
src
,
CARD32
*
mask
,
int
width
)
{
const
CARD32
*
end
=
src
+
width
;
while
(
src
<
end
)
{
__m64
a
=
load8888
(
*
mask
);
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dest
);
__m64
sa
=
expand_alpha
(
s
);
*
dest
=
store8888
(
in_over
(
s
,
sa
,
a
,
d
));
++
src
;
++
dest
;
++
mask
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineOverReverseC
(
CARD32
*
dest
,
CARD32
*
src
,
CARD32
*
mask
,
int
width
)
{
const
CARD32
*
end
=
src
+
width
;
while
(
src
<
end
)
{
__m64
a
=
load8888
(
*
mask
);
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dest
);
__m64
da
=
expand_alpha
(
d
);
*
dest
=
store8888
(
over
(
d
,
da
,
in
(
s
,
a
)));
++
src
;
++
dest
;
++
mask
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineInC
(
CARD32
*
dest
,
CARD32
*
src
,
CARD32
*
mask
,
int
width
)
{
const
CARD32
*
end
=
src
+
width
;
while
(
src
<
end
)
{
__m64
a
=
load8888
(
*
mask
);
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dest
);
__m64
da
=
expand_alpha
(
d
);
s
=
pix_multiply
(
s
,
a
);
s
=
pix_multiply
(
s
,
da
);
*
dest
=
store8888
(
s
);
++
src
;
++
dest
;
++
mask
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineInReverseC
(
CARD32
*
dest
,
CARD32
*
src
,
CARD32
*
mask
,
int
width
)
{
const
CARD32
*
end
=
src
+
width
;
while
(
src
<
end
)
{
__m64
a
=
load8888
(
*
mask
);
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dest
);
__m64
sa
=
expand_alpha
(
s
);
a
=
pix_multiply
(
a
,
sa
);
d
=
pix_multiply
(
d
,
a
);
*
dest
=
store8888
(
d
);
++
src
;
++
dest
;
++
mask
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineOutC
(
CARD32
*
dest
,
CARD32
*
src
,
CARD32
*
mask
,
int
width
)
{
const
CARD32
*
end
=
src
+
width
;
while
(
src
<
end
)
{
__m64
a
=
load8888
(
*
mask
);
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dest
);
__m64
da
=
expand_alpha
(
d
);
da
=
negate
(
da
);
s
=
pix_multiply
(
s
,
a
);
s
=
pix_multiply
(
s
,
da
);
*
dest
=
store8888
(
s
);
++
src
;
++
dest
;
++
mask
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineOutReverseC
(
CARD32
*
dest
,
CARD32
*
src
,
CARD32
*
mask
,
int
width
)
{
const
CARD32
*
end
=
src
+
width
;
while
(
src
<
end
)
{
__m64
a
=
load8888
(
*
mask
);
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dest
);
__m64
sa
=
expand_alpha
(
s
);
a
=
pix_multiply
(
a
,
sa
);
a
=
negate
(
a
);
d
=
pix_multiply
(
d
,
a
);
*
dest
=
store8888
(
d
);
++
src
;
++
dest
;
++
mask
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineAtopC
(
CARD32
*
dest
,
CARD32
*
src
,
CARD32
*
mask
,
int
width
)
{
const
CARD32
*
end
=
src
+
width
;
while
(
src
<
end
)
{
__m64
a
=
load8888
(
*
mask
);
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dest
);
__m64
da
=
expand_alpha
(
d
);
__m64
sa
=
expand_alpha
(
s
);
s
=
pix_multiply
(
s
,
a
);
a
=
pix_multiply
(
a
,
sa
);
a
=
negate
(
a
);
d
=
pix_add_mul
(
d
,
a
,
s
,
da
);
*
dest
=
store8888
(
d
);
++
src
;
++
dest
;
++
mask
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineAtopReverseC
(
CARD32
*
dest
,
CARD32
*
src
,
CARD32
*
mask
,
int
width
)
{
const
CARD32
*
end
=
src
+
width
;
while
(
src
<
end
)
{
__m64
a
=
load8888
(
*
mask
);
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dest
);
__m64
da
=
expand_alpha
(
d
);
__m64
sa
=
expand_alpha
(
s
);
s
=
pix_multiply
(
s
,
a
);
a
=
pix_multiply
(
a
,
sa
);
da
=
negate
(
da
);
d
=
pix_add_mul
(
d
,
a
,
s
,
da
);
*
dest
=
store8888
(
d
);
++
src
;
++
dest
;
++
mask
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineXorC
(
CARD32
*
dest
,
CARD32
*
src
,
CARD32
*
mask
,
int
width
)
{
const
CARD32
*
end
=
src
+
width
;
while
(
src
<
end
)
{
__m64
a
=
load8888
(
*
mask
);
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dest
);
__m64
da
=
expand_alpha
(
d
);
__m64
sa
=
expand_alpha
(
s
);
s
=
pix_multiply
(
s
,
a
);
a
=
pix_multiply
(
a
,
sa
);
da
=
negate
(
da
);
a
=
negate
(
a
);
d
=
pix_add_mul
(
d
,
a
,
s
,
da
);
*
dest
=
store8888
(
d
);
++
src
;
++
dest
;
++
mask
;
}
_mm_empty
();
}
static
FASTCALL
void
mmxCombineAddC
(
CARD32
*
dest
,
CARD32
*
src
,
CARD32
*
mask
,
int
width
)
{
const
CARD32
*
end
=
src
+
width
;
while
(
src
<
end
)
{
__m64
a
=
load8888
(
*
mask
);
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dest
);
s
=
pix_multiply
(
s
,
a
);
d
=
pix_add
(
s
,
d
);
*
dest
=
store8888
(
d
);
++
src
;
++
dest
;
++
mask
;
}
_mm_empty
();
}
extern
FbComposeFunctions
composeFunctions
;
void
fbComposeSetupMMX
(
void
)
{
/* check if we have MMX support and initialize accordingly */
if
(
fbHaveMMX
())
{
composeFunctions
.
combineU
[
PictOpOver
]
=
mmxCombineOverU
;
composeFunctions
.
combineU
[
PictOpOverReverse
]
=
mmxCombineOverReverseU
;
composeFunctions
.
combineU
[
PictOpIn
]
=
mmxCombineInU
;
composeFunctions
.
combineU
[
PictOpInReverse
]
=
mmxCombineInReverseU
;
composeFunctions
.
combineU
[
PictOpOut
]
=
mmxCombineOutU
;
composeFunctions
.
combineU
[
PictOpOutReverse
]
=
mmxCombineOutReverseU
;
composeFunctions
.
combineU
[
PictOpAtop
]
=
mmxCombineAtopU
;
composeFunctions
.
combineU
[
PictOpAtopReverse
]
=
mmxCombineAtopReverseU
;
composeFunctions
.
combineU
[
PictOpXor
]
=
mmxCombineXorU
;
composeFunctions
.
combineU
[
PictOpAdd
]
=
mmxCombineAddU
;
composeFunctions
.
combineU
[
PictOpSaturate
]
=
mmxCombineSaturateU
;
composeFunctions
.
combineC
[
PictOpSrc
]
=
mmxCombineSrcC
;
composeFunctions
.
combineC
[
PictOpOver
]
=
mmxCombineOverC
;
composeFunctions
.
combineC
[
PictOpOverReverse
]
=
mmxCombineOverReverseC
;
composeFunctions
.
combineC
[
PictOpIn
]
=
mmxCombineInC
;
composeFunctions
.
combineC
[
PictOpInReverse
]
=
mmxCombineInReverseC
;
composeFunctions
.
combineC
[
PictOpOut
]
=
mmxCombineOutC
;
composeFunctions
.
combineC
[
PictOpOutReverse
]
=
mmxCombineOutReverseC
;
composeFunctions
.
combineC
[
PictOpAtop
]
=
mmxCombineAtopC
;
composeFunctions
.
combineC
[
PictOpAtopReverse
]
=
mmxCombineAtopReverseC
;
composeFunctions
.
combineC
[
PictOpXor
]
=
mmxCombineXorC
;
composeFunctions
.
combineC
[
PictOpAdd
]
=
mmxCombineAddC
;
composeFunctions
.
combineMaskU
=
mmxCombineMaskU
;
}
}
/* ------------------ MMX code paths called from fbpict.c ----------------------- */
void
fbCompositeSolid_nx8888mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
)
{
CARD32
src
;
CARD32
*
dstLine
,
*
dst
;
CARD16
w
;
FbStride
dstStride
;
__m64
vsrc
,
vsrca
;
CHECKPOINT
();
fbComposeGetSolid
(
pSrc
,
src
,
pDst
->
format
);
if
(
src
>>
24
==
0
)
return
;
fbComposeGetStart
(
pDst
,
xDst
,
yDst
,
CARD32
,
dstStride
,
dstLine
,
1
);
vsrc
=
load8888
(
src
);
vsrca
=
expand_alpha
(
vsrc
);
while
(
height
--
)
{
dst
=
dstLine
;
dstLine
+=
dstStride
;
w
=
width
;
CHECKPOINT
();
while
(
w
&&
(
unsigned
long
)
dst
&
7
)
{
*
dst
=
store8888
(
over
(
vsrc
,
vsrca
,
load8888
(
*
dst
)));
w
--
;
dst
++
;
}
while
(
w
>=
2
)
{
__m64
vdest
;
__m64
dest0
,
dest1
;
vdest
=
*
(
__m64
*
)
dst
;
dest0
=
over
(
vsrc
,
vsrca
,
expand8888
(
vdest
,
0
));
dest1
=
over
(
vsrc
,
vsrca
,
expand8888
(
vdest
,
1
));
*
(
__m64
*
)
dst
=
pack8888
(
dest0
,
dest1
);
dst
+=
2
;
w
-=
2
;
}
CHECKPOINT
();
while
(
w
)
{
*
dst
=
store8888
(
over
(
vsrc
,
vsrca
,
load8888
(
*
dst
)));
w
--
;
dst
++
;
}
}
_mm_empty
();
}
void
fbCompositeSolid_nx0565mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
)
{
CARD32
src
;
CARD16
*
dstLine
,
*
dst
;
CARD16
w
;
FbStride
dstStride
;
__m64
vsrc
,
vsrca
;
CHECKPOINT
();
fbComposeGetSolid
(
pSrc
,
src
,
pDst
->
format
);
if
(
src
>>
24
==
0
)
return
;
fbComposeGetStart
(
pDst
,
xDst
,
yDst
,
CARD16
,
dstStride
,
dstLine
,
1
);
vsrc
=
load8888
(
src
);
vsrca
=
expand_alpha
(
vsrc
);
while
(
height
--
)
{
dst
=
dstLine
;
dstLine
+=
dstStride
;
w
=
width
;
CHECKPOINT
();
while
(
w
&&
(
unsigned
long
)
dst
&
7
)
{
ullong
d
=
*
dst
;
__m64
vdest
=
expand565
((
__m64
)
d
,
0
);
vdest
=
pack565
(
over
(
vsrc
,
vsrca
,
vdest
),
vdest
,
0
);
*
dst
=
(
ullong
)
vdest
;
w
--
;
dst
++
;
}
while
(
w
>=
4
)
{
__m64
vdest
;
vdest
=
*
(
__m64
*
)
dst
;
vdest
=
pack565
(
over
(
vsrc
,
vsrca
,
expand565
(
vdest
,
0
)),
vdest
,
0
);
vdest
=
pack565
(
over
(
vsrc
,
vsrca
,
expand565
(
vdest
,
1
)),
vdest
,
1
);
vdest
=
pack565
(
over
(
vsrc
,
vsrca
,
expand565
(
vdest
,
2
)),
vdest
,
2
);
vdest
=
pack565
(
over
(
vsrc
,
vsrca
,
expand565
(
vdest
,
3
)),
vdest
,
3
);
*
(
__m64
*
)
dst
=
vdest
;
dst
+=
4
;
w
-=
4
;
}
CHECKPOINT
();
while
(
w
)
{
ullong
d
=
*
dst
;
__m64
vdest
=
expand565
((
__m64
)
d
,
0
);
vdest
=
pack565
(
over
(
vsrc
,
vsrca
,
vdest
),
vdest
,
0
);
*
dst
=
(
ullong
)
vdest
;
w
--
;
dst
++
;
}
}
_mm_empty
();
}
void
fbCompositeSolidMask_nx8888x8888Cmmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
)
{
CARD32
src
,
srca
;
CARD32
*
dstLine
;
CARD32
*
maskLine
;
FbStride
dstStride
,
maskStride
;
__m64
vsrc
,
vsrca
;
CHECKPOINT
();
fbComposeGetSolid
(
pSrc
,
src
,
pDst
->
format
);
srca
=
src
>>
24
;
if
(
srca
==
0
)
return
;
fbComposeGetStart
(
pDst
,
xDst
,
yDst
,
CARD32
,
dstStride
,
dstLine
,
1
);
fbComposeGetStart
(
pMask
,
xMask
,
yMask
,
CARD32
,
maskStride
,
maskLine
,
1
);
vsrc
=
load8888
(
src
);
vsrca
=
expand_alpha
(
vsrc
);
while
(
height
--
)
{
int
twidth
=
width
;
CARD32
*
p
=
(
CARD32
*
)
maskLine
;
CARD32
*
q
=
(
CARD32
*
)
dstLine
;
while
(
twidth
&&
(
unsigned
long
)
q
&
7
)
{
CARD32
m
=
*
(
CARD32
*
)
p
;
if
(
m
)
{
__m64
vdest
=
load8888
(
*
q
);
vdest
=
in_over
(
vsrc
,
vsrca
,
load8888
(
m
),
vdest
);
*
q
=
store8888
(
vdest
);
}
twidth
--
;
p
++
;
q
++
;
}
while
(
twidth
>=
2
)
{
CARD32
m0
,
m1
;
m0
=
*
p
;
m1
=
*
(
p
+
1
);
if
(
m0
|
m1
)
{
__m64
dest0
,
dest1
;
__m64
vdest
=
*
(
__m64
*
)
q
;
dest0
=
in_over
(
vsrc
,
vsrca
,
load8888
(
m0
),
expand8888
(
vdest
,
0
));
dest1
=
in_over
(
vsrc
,
vsrca
,
load8888
(
m1
),
expand8888
(
vdest
,
1
));
*
(
__m64
*
)
q
=
pack8888
(
dest0
,
dest1
);
}
p
+=
2
;
q
+=
2
;
twidth
-=
2
;
}
while
(
twidth
)
{
CARD32
m
=
*
(
CARD32
*
)
p
;
if
(
m
)
{
__m64
vdest
=
load8888
(
*
q
);
vdest
=
in_over
(
vsrc
,
vsrca
,
load8888
(
m
),
vdest
);
*
q
=
store8888
(
vdest
);
}
twidth
--
;
p
++
;
q
++
;
}
dstLine
+=
dstStride
;
maskLine
+=
maskStride
;
}
_mm_empty
();
}
void
fbCompositeSrc_8888x8x8888mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
)
{
CARD32
*
dstLine
,
*
dst
;
CARD32
*
srcLine
,
*
src
;
CARD8
*
maskLine
;
CARD32
mask
;
__m64
vmask
;
FbStride
dstStride
,
srcStride
,
maskStride
;
CARD16
w
;
__m64
srca
;
CHECKPOINT
();
fbComposeGetStart
(
pDst
,
xDst
,
yDst
,
CARD32
,
dstStride
,
dstLine
,
1
);
fbComposeGetStart
(
pSrc
,
xSrc
,
ySrc
,
CARD32
,
srcStride
,
srcLine
,
1
);
fbComposeGetStart
(
pMask
,
xMask
,
yMask
,
CARD8
,
maskStride
,
maskLine
,
1
);
mask
=
*
maskLine
<<
24
|
*
maskLine
<<
16
|
*
maskLine
<<
8
|
*
maskLine
;
vmask
=
load8888
(
mask
);
srca
=
MC
(
4
x00ff
);
while
(
height
--
)
{
dst
=
dstLine
;
dstLine
+=
dstStride
;
src
=
srcLine
;
srcLine
+=
srcStride
;
w
=
width
;
while
(
w
&&
(
unsigned
long
)
dst
&
7
)
{
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dst
);
*
dst
=
store8888
(
in_over
(
s
,
srca
,
vmask
,
d
));
w
--
;
dst
++
;
src
++
;
}
while
(
w
>=
16
)
{
__m64
vd0
=
*
(
__m64
*
)(
dst
+
0
);
__m64
vd1
=
*
(
__m64
*
)(
dst
+
2
);
__m64
vd2
=
*
(
__m64
*
)(
dst
+
4
);
__m64
vd3
=
*
(
__m64
*
)(
dst
+
6
);
__m64
vd4
=
*
(
__m64
*
)(
dst
+
8
);
__m64
vd5
=
*
(
__m64
*
)(
dst
+
10
);
__m64
vd6
=
*
(
__m64
*
)(
dst
+
12
);
__m64
vd7
=
*
(
__m64
*
)(
dst
+
14
);
__m64
vs0
=
*
(
__m64
*
)(
src
+
0
);
__m64
vs1
=
*
(
__m64
*
)(
src
+
2
);
__m64
vs2
=
*
(
__m64
*
)(
src
+
4
);
__m64
vs3
=
*
(
__m64
*
)(
src
+
6
);
__m64
vs4
=
*
(
__m64
*
)(
src
+
8
);
__m64
vs5
=
*
(
__m64
*
)(
src
+
10
);
__m64
vs6
=
*
(
__m64
*
)(
src
+
12
);
__m64
vs7
=
*
(
__m64
*
)(
src
+
14
);
vd0
=
(
__m64
)
pack8888
(
in_over
(
expand8888
(
vs0
,
0
),
srca
,
vmask
,
expand8888
(
vd0
,
0
)),
in_over
(
expand8888
(
vs0
,
1
),
srca
,
vmask
,
expand8888
(
vd0
,
1
)));
vd1
=
(
__m64
)
pack8888
(
in_over
(
expand8888
(
vs1
,
0
),
srca
,
vmask
,
expand8888
(
vd1
,
0
)),
in_over
(
expand8888
(
vs1
,
1
),
srca
,
vmask
,
expand8888
(
vd1
,
1
)));
vd2
=
(
__m64
)
pack8888
(
in_over
(
expand8888
(
vs2
,
0
),
srca
,
vmask
,
expand8888
(
vd2
,
0
)),
in_over
(
expand8888
(
vs2
,
1
),
srca
,
vmask
,
expand8888
(
vd2
,
1
)));
vd3
=
(
__m64
)
pack8888
(
in_over
(
expand8888
(
vs3
,
0
),
srca
,
vmask
,
expand8888
(
vd3
,
0
)),
in_over
(
expand8888
(
vs3
,
1
),
srca
,
vmask
,
expand8888
(
vd3
,
1
)));
vd4
=
(
__m64
)
pack8888
(
in_over
(
expand8888
(
vs4
,
0
),
srca
,
vmask
,
expand8888
(
vd4
,
0
)),
in_over
(
expand8888
(
vs4
,
1
),
srca
,
vmask
,
expand8888
(
vd4
,
1
)));
vd5
=
(
__m64
)
pack8888
(
in_over
(
expand8888
(
vs5
,
0
),
srca
,
vmask
,
expand8888
(
vd5
,
0
)),
in_over
(
expand8888
(
vs5
,
1
),
srca
,
vmask
,
expand8888
(
vd5
,
1
)));
vd6
=
(
__m64
)
pack8888
(
in_over
(
expand8888
(
vs6
,
0
),
srca
,
vmask
,
expand8888
(
vd6
,
0
)),
in_over
(
expand8888
(
vs6
,
1
),
srca
,
vmask
,
expand8888
(
vd6
,
1
)));
vd7
=
(
__m64
)
pack8888
(
in_over
(
expand8888
(
vs7
,
0
),
srca
,
vmask
,
expand8888
(
vd7
,
0
)),
in_over
(
expand8888
(
vs7
,
1
),
srca
,
vmask
,
expand8888
(
vd7
,
1
)));
*
(
__m64
*
)(
dst
+
0
)
=
vd0
;
*
(
__m64
*
)(
dst
+
2
)
=
vd1
;
*
(
__m64
*
)(
dst
+
4
)
=
vd2
;
*
(
__m64
*
)(
dst
+
6
)
=
vd3
;
*
(
__m64
*
)(
dst
+
8
)
=
vd4
;
*
(
__m64
*
)(
dst
+
10
)
=
vd5
;
*
(
__m64
*
)(
dst
+
12
)
=
vd6
;
*
(
__m64
*
)(
dst
+
14
)
=
vd7
;
w
-=
16
;
dst
+=
16
;
src
+=
16
;
}
while
(
w
)
{
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dst
);
*
dst
=
store8888
(
in_over
(
s
,
srca
,
vmask
,
d
));
w
--
;
dst
++
;
src
++
;
}
}
_mm_empty
();
}
void
fbCompositeSrc_8888x8888mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
)
{
CARD32
*
dstLine
,
*
dst
;
CARD32
*
srcLine
,
*
src
;
FbStride
dstStride
,
srcStride
;
CARD16
w
;
__m64
srca
;
CHECKPOINT
();
fbComposeGetStart
(
pDst
,
xDst
,
yDst
,
CARD32
,
dstStride
,
dstLine
,
1
);
fbComposeGetStart
(
pSrc
,
xSrc
,
ySrc
,
CARD32
,
srcStride
,
srcLine
,
1
);
srca
=
MC
(
4
x00ff
);
while
(
height
--
)
{
dst
=
dstLine
;
dstLine
+=
dstStride
;
src
=
srcLine
;
srcLine
+=
srcStride
;
w
=
width
;
while
(
w
&&
(
unsigned
long
)
dst
&
7
)
{
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dst
);
*
dst
=
store8888
(
over
(
s
,
expand_alpha
(
s
),
d
));
w
--
;
dst
++
;
src
++
;
}
while
(
w
>=
2
)
{
__m64
vd
=
*
(
__m64
*
)(
dst
+
0
);
__m64
vs
=
*
(
__m64
*
)(
src
+
0
);
__m64
vs0
=
expand8888
(
vs
,
0
);
__m64
vs1
=
expand8888
(
vs
,
1
);
*
(
__m64
*
)
dst
=
(
__m64
)
pack8888
(
over
(
vs0
,
expand_alpha
(
vs0
),
expand8888
(
vd
,
0
)),
over
(
vs1
,
expand_alpha
(
vs1
),
expand8888
(
vd
,
1
)));
w
-=
2
;
dst
+=
2
;
src
+=
2
;
}
while
(
w
)
{
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dst
);
*
dst
=
store8888
(
over
(
s
,
expand_alpha
(
s
),
d
));
w
--
;
dst
++
;
src
++
;
}
}
_mm_empty
();
}
void
fbCompositeSolidMask_nx8x8888mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
)
{
CARD32
src
,
srca
;
CARD32
*
dstLine
,
*
dst
;
CARD8
*
maskLine
,
*
mask
;
FbStride
dstStride
,
maskStride
;
CARD16
w
;
__m64
vsrc
,
vsrca
;
ullong
srcsrc
;
CHECKPOINT
();
fbComposeGetSolid
(
pSrc
,
src
,
pDst
->
format
);
srca
=
src
>>
24
;
if
(
srca
==
0
)
return
;
srcsrc
=
(
unsigned
long
long
)
src
<<
32
|
src
;
fbComposeGetStart
(
pDst
,
xDst
,
yDst
,
CARD32
,
dstStride
,
dstLine
,
1
);
fbComposeGetStart
(
pMask
,
xMask
,
yMask
,
CARD8
,
maskStride
,
maskLine
,
1
);
vsrc
=
load8888
(
src
);
vsrca
=
expand_alpha
(
vsrc
);
while
(
height
--
)
{
dst
=
dstLine
;
dstLine
+=
dstStride
;
mask
=
maskLine
;
maskLine
+=
maskStride
;
w
=
width
;
CHECKPOINT
();
while
(
w
&&
(
unsigned
long
)
dst
&
7
)
{
ullong
m
=
*
mask
;
if
(
m
)
{
__m64
vdest
=
in_over
(
vsrc
,
vsrca
,
expand_alpha_rev
((
__m64
)
m
),
load8888
(
*
dst
));
*
dst
=
store8888
(
vdest
);
}
w
--
;
mask
++
;
dst
++
;
}
CHECKPOINT
();
while
(
w
>=
2
)
{
ullong
m0
,
m1
;
m0
=
*
mask
;
m1
=
*
(
mask
+
1
);
if
(
srca
==
0xff
&&
(
m0
&
m1
)
==
0xff
)
{
*
(
unsigned
long
long
*
)
dst
=
srcsrc
;
}
else
if
(
m0
|
m1
)
{
__m64
vdest
;
__m64
dest0
,
dest1
;
vdest
=
*
(
__m64
*
)
dst
;
dest0
=
in_over
(
vsrc
,
vsrca
,
expand_alpha_rev
((
__m64
)
m0
),
expand8888
(
vdest
,
0
));
dest1
=
in_over
(
vsrc
,
vsrca
,
expand_alpha_rev
((
__m64
)
m1
),
expand8888
(
vdest
,
1
));
*
(
__m64
*
)
dst
=
pack8888
(
dest0
,
dest1
);
}
mask
+=
2
;
dst
+=
2
;
w
-=
2
;
}
CHECKPOINT
();
while
(
w
)
{
ullong
m
=
*
mask
;
if
(
m
)
{
__m64
vdest
=
load8888
(
*
dst
);
vdest
=
in_over
(
vsrc
,
vsrca
,
expand_alpha_rev
((
__m64
)
m
),
vdest
);
*
dst
=
store8888
(
vdest
);
}
w
--
;
mask
++
;
dst
++
;
}
}
_mm_empty
();
}
void
fbCompositeSolidMask_nx8x0565mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
)
{
CARD32
src
,
srca
;
CARD16
*
dstLine
,
*
dst
;
CARD8
*
maskLine
,
*
mask
;
FbStride
dstStride
,
maskStride
;
CARD16
w
;
__m64
vsrc
,
vsrca
;
unsigned
long
long
srcsrcsrcsrc
,
src16
;
CHECKPOINT
();
fbComposeGetSolid
(
pSrc
,
src
,
pDst
->
format
);
srca
=
src
>>
24
;
if
(
srca
==
0
)
return
;
fbComposeGetStart
(
pDst
,
xDst
,
yDst
,
CARD16
,
dstStride
,
dstLine
,
1
);
fbComposeGetStart
(
pMask
,
xMask
,
yMask
,
CARD8
,
maskStride
,
maskLine
,
1
);
vsrc
=
load8888
(
src
);
vsrca
=
expand_alpha
(
vsrc
);
src16
=
(
ullong
)
pack565
(
vsrc
,
_mm_setzero_si64
(),
0
);
srcsrcsrcsrc
=
(
ullong
)
src16
<<
48
|
(
ullong
)
src16
<<
32
|
(
ullong
)
src16
<<
16
|
(
ullong
)
src16
;
while
(
height
--
)
{
dst
=
dstLine
;
dstLine
+=
dstStride
;
mask
=
maskLine
;
maskLine
+=
maskStride
;
w
=
width
;
CHECKPOINT
();
while
(
w
&&
(
unsigned
long
)
dst
&
7
)
{
ullong
m
=
*
mask
;
if
(
m
)
{
ullong
d
=
*
dst
;
__m64
vd
=
(
__m64
)
d
;
__m64
vdest
=
in_over
(
vsrc
,
vsrca
,
expand_alpha_rev
((
__m64
)
m
),
expand565
(
vd
,
0
));
*
dst
=
(
ullong
)
pack565
(
vdest
,
_mm_setzero_si64
(),
0
);
}
w
--
;
mask
++
;
dst
++
;
}
CHECKPOINT
();
while
(
w
>=
4
)
{
ullong
m0
,
m1
,
m2
,
m3
;
m0
=
*
mask
;
m1
=
*
(
mask
+
1
);
m2
=
*
(
mask
+
2
);
m3
=
*
(
mask
+
3
);
if
(
srca
==
0xff
&&
(
m0
&
m1
&
m2
&
m3
)
==
0xff
)
{
*
(
unsigned
long
long
*
)
dst
=
srcsrcsrcsrc
;
}
else
if
(
m0
|
m1
|
m2
|
m3
)
{
__m64
vdest
;
__m64
vm0
,
vm1
,
vm2
,
vm3
;
vdest
=
*
(
__m64
*
)
dst
;
vm0
=
(
__m64
)
m0
;
vdest
=
pack565
(
in_over
(
vsrc
,
vsrca
,
expand_alpha_rev
(
vm0
),
expand565
(
vdest
,
0
)),
vdest
,
0
);
vm1
=
(
__m64
)
m1
;
vdest
=
pack565
(
in_over
(
vsrc
,
vsrca
,
expand_alpha_rev
(
vm1
),
expand565
(
vdest
,
1
)),
vdest
,
1
);
vm2
=
(
__m64
)
m2
;
vdest
=
pack565
(
in_over
(
vsrc
,
vsrca
,
expand_alpha_rev
(
vm2
),
expand565
(
vdest
,
2
)),
vdest
,
2
);
vm3
=
(
__m64
)
m3
;
vdest
=
pack565
(
in_over
(
vsrc
,
vsrca
,
expand_alpha_rev
(
vm3
),
expand565
(
vdest
,
3
)),
vdest
,
3
);
*
(
__m64
*
)
dst
=
vdest
;
}
w
-=
4
;
mask
+=
4
;
dst
+=
4
;
}
CHECKPOINT
();
while
(
w
)
{
ullong
m
=
*
mask
;
if
(
m
)
{
ullong
d
=
*
dst
;
__m64
vd
=
(
__m64
)
d
;
__m64
vdest
=
in_over
(
vsrc
,
vsrca
,
expand_alpha_rev
((
__m64
)
m
),
expand565
(
vd
,
0
));
*
dst
=
(
ullong
)
pack565
(
vdest
,
_mm_setzero_si64
(),
0
);
}
w
--
;
mask
++
;
dst
++
;
}
}
_mm_empty
();
}
void
fbCompositeSrc_8888RevNPx0565mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
)
{
CARD16
*
dstLine
,
*
dst
;
CARD32
*
srcLine
,
*
src
;
FbStride
dstStride
,
srcStride
;
CARD16
w
;
CHECKPOINT
();
fbComposeGetStart
(
pDst
,
xDst
,
yDst
,
CARD16
,
dstStride
,
dstLine
,
1
);
fbComposeGetStart
(
pSrc
,
xSrc
,
ySrc
,
CARD32
,
srcStride
,
srcLine
,
1
);
assert
(
pSrc
->
pDrawable
==
pMask
->
pDrawable
);
while
(
height
--
)
{
dst
=
dstLine
;
dstLine
+=
dstStride
;
src
=
srcLine
;
srcLine
+=
srcStride
;
w
=
width
;
CHECKPOINT
();
while
(
w
&&
(
unsigned
long
)
dst
&
7
)
{
__m64
vsrc
=
load8888
(
*
src
);
ullong
d
=
*
dst
;
__m64
vdest
=
expand565
((
__m64
)
d
,
0
);
vdest
=
pack565
(
over_rev_non_pre
(
vsrc
,
vdest
),
vdest
,
0
);
*
dst
=
(
ullong
)
vdest
;
w
--
;
dst
++
;
src
++
;
}
CHECKPOINT
();
while
(
w
>=
4
)
{
CARD32
s0
,
s1
,
s2
,
s3
;
unsigned
char
a0
,
a1
,
a2
,
a3
;
s0
=
*
src
;
s1
=
*
(
src
+
1
);
s2
=
*
(
src
+
2
);
s3
=
*
(
src
+
3
);
a0
=
(
s0
>>
24
);
a1
=
(
s1
>>
24
);
a2
=
(
s2
>>
24
);
a3
=
(
s3
>>
24
);
if
((
a0
&
a1
&
a2
&
a3
)
==
0xFF
)
{
__m64
vdest
;
vdest
=
pack565
(
invert_colors
(
load8888
(
s0
)),
_mm_setzero_si64
(),
0
);
vdest
=
pack565
(
invert_colors
(
load8888
(
s1
)),
vdest
,
1
);
vdest
=
pack565
(
invert_colors
(
load8888
(
s2
)),
vdest
,
2
);
vdest
=
pack565
(
invert_colors
(
load8888
(
s3
)),
vdest
,
3
);
*
(
__m64
*
)
dst
=
vdest
;
}
else
if
(
a0
|
a1
|
a2
|
a3
)
{
__m64
vdest
=
*
(
__m64
*
)
dst
;
vdest
=
pack565
(
over_rev_non_pre
(
load8888
(
s0
),
expand565
(
vdest
,
0
)),
vdest
,
0
);
vdest
=
pack565
(
over_rev_non_pre
(
load8888
(
s1
),
expand565
(
vdest
,
1
)),
vdest
,
1
);
vdest
=
pack565
(
over_rev_non_pre
(
load8888
(
s2
),
expand565
(
vdest
,
2
)),
vdest
,
2
);
vdest
=
pack565
(
over_rev_non_pre
(
load8888
(
s3
),
expand565
(
vdest
,
3
)),
vdest
,
3
);
*
(
__m64
*
)
dst
=
vdest
;
}
w
-=
4
;
dst
+=
4
;
src
+=
4
;
}
CHECKPOINT
();
while
(
w
)
{
__m64
vsrc
=
load8888
(
*
src
);
ullong
d
=
*
dst
;
__m64
vdest
=
expand565
((
__m64
)
d
,
0
);
vdest
=
pack565
(
over_rev_non_pre
(
vsrc
,
vdest
),
vdest
,
0
);
*
dst
=
(
ullong
)
vdest
;
w
--
;
dst
++
;
src
++
;
}
}
_mm_empty
();
}
/* "8888RevNP" is GdkPixbuf's format: ABGR, non premultiplied */
void
fbCompositeSrc_8888RevNPx8888mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
)
{
CARD32
*
dstLine
,
*
dst
;
CARD32
*
srcLine
,
*
src
;
FbStride
dstStride
,
srcStride
;
CARD16
w
;
CHECKPOINT
();
fbComposeGetStart
(
pDst
,
xDst
,
yDst
,
CARD32
,
dstStride
,
dstLine
,
1
);
fbComposeGetStart
(
pSrc
,
xSrc
,
ySrc
,
CARD32
,
srcStride
,
srcLine
,
1
);
assert
(
pSrc
->
pDrawable
==
pMask
->
pDrawable
);
while
(
height
--
)
{
dst
=
dstLine
;
dstLine
+=
dstStride
;
src
=
srcLine
;
srcLine
+=
srcStride
;
w
=
width
;
while
(
w
&&
(
unsigned
long
)
dst
&
7
)
{
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dst
);
*
dst
=
store8888
(
over_rev_non_pre
(
s
,
d
));
w
--
;
dst
++
;
src
++
;
}
while
(
w
>=
2
)
{
ullong
s0
,
s1
;
unsigned
char
a0
,
a1
;
__m64
d0
,
d1
;
s0
=
*
src
;
s1
=
*
(
src
+
1
);
a0
=
(
s0
>>
24
);
a1
=
(
s1
>>
24
);
if
((
a0
&
a1
)
==
0xFF
)
{
d0
=
invert_colors
(
load8888
(
s0
));
d1
=
invert_colors
(
load8888
(
s1
));
*
(
__m64
*
)
dst
=
pack8888
(
d0
,
d1
);
}
else
if
(
a0
|
a1
)
{
__m64
vdest
=
*
(
__m64
*
)
dst
;
d0
=
over_rev_non_pre
(
load8888
(
s0
),
expand8888
(
vdest
,
0
));
d1
=
over_rev_non_pre
(
load8888
(
s1
),
expand8888
(
vdest
,
1
));
*
(
__m64
*
)
dst
=
pack8888
(
d0
,
d1
);
}
w
-=
2
;
dst
+=
2
;
src
+=
2
;
}
while
(
w
)
{
__m64
s
=
load8888
(
*
src
);
__m64
d
=
load8888
(
*
dst
);
*
dst
=
store8888
(
over_rev_non_pre
(
s
,
d
));
w
--
;
dst
++
;
src
++
;
}
}
_mm_empty
();
}
void
fbCompositeSolidMask_nx8888x0565Cmmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
)
{
CARD32
src
,
srca
;
CARD16
*
dstLine
;
CARD32
*
maskLine
;
FbStride
dstStride
,
maskStride
;
__m64
vsrc
,
vsrca
;
CHECKPOINT
();
fbComposeGetSolid
(
pSrc
,
src
,
pDst
->
format
);
srca
=
src
>>
24
;
if
(
srca
==
0
)
return
;
fbComposeGetStart
(
pDst
,
xDst
,
yDst
,
CARD16
,
dstStride
,
dstLine
,
1
);
fbComposeGetStart
(
pMask
,
xMask
,
yMask
,
CARD32
,
maskStride
,
maskLine
,
1
);
vsrc
=
load8888
(
src
);
vsrca
=
expand_alpha
(
vsrc
);
while
(
height
--
)
{
int
twidth
=
width
;
CARD32
*
p
=
(
CARD32
*
)
maskLine
;
CARD16
*
q
=
(
CARD16
*
)
dstLine
;
while
(
twidth
&&
((
unsigned
long
)
q
&
7
))
{
CARD32
m
=
*
(
CARD32
*
)
p
;
if
(
m
)
{
ullong
d
=
*
q
;
__m64
vdest
=
expand565
((
__m64
)
d
,
0
);
vdest
=
pack565
(
in_over
(
vsrc
,
vsrca
,
load8888
(
m
),
vdest
),
vdest
,
0
);
*
q
=
(
ullong
)
vdest
;
}
twidth
--
;
p
++
;
q
++
;
}
while
(
twidth
>=
4
)
{
CARD32
m0
,
m1
,
m2
,
m3
;
m0
=
*
p
;
m1
=
*
(
p
+
1
);
m2
=
*
(
p
+
2
);
m3
=
*
(
p
+
3
);
if
((
m0
|
m1
|
m2
|
m3
))
{
__m64
vdest
=
*
(
__m64
*
)
q
;
vdest
=
pack565
(
in_over
(
vsrc
,
vsrca
,
load8888
(
m0
),
expand565
(
vdest
,
0
)),
vdest
,
0
);
vdest
=
pack565
(
in_over
(
vsrc
,
vsrca
,
load8888
(
m1
),
expand565
(
vdest
,
1
)),
vdest
,
1
);
vdest
=
pack565
(
in_over
(
vsrc
,
vsrca
,
load8888
(
m2
),
expand565
(
vdest
,
2
)),
vdest
,
2
);
vdest
=
pack565
(
in_over
(
vsrc
,
vsrca
,
load8888
(
m3
),
expand565
(
vdest
,
3
)),
vdest
,
3
);
*
(
__m64
*
)
q
=
vdest
;
}
twidth
-=
4
;
p
+=
4
;
q
+=
4
;
}
while
(
twidth
)
{
CARD32
m
;
m
=
*
(
CARD32
*
)
p
;
if
(
m
)
{
ullong
d
=
*
q
;
__m64
vdest
=
expand565
((
__m64
)
d
,
0
);
vdest
=
pack565
(
in_over
(
vsrc
,
vsrca
,
load8888
(
m
),
vdest
),
vdest
,
0
);
*
q
=
(
ullong
)
vdest
;
}
twidth
--
;
p
++
;
q
++
;
}
maskLine
+=
maskStride
;
dstLine
+=
dstStride
;
}
_mm_empty
();
}
void
fbCompositeSrcAdd_8000x8000mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
)
{
CARD8
*
dstLine
,
*
dst
;
CARD8
*
srcLine
,
*
src
;
FbStride
dstStride
,
srcStride
;
CARD16
w
;
CARD8
s
,
d
;
CARD16
t
;
CHECKPOINT
();
fbComposeGetStart
(
pSrc
,
xSrc
,
ySrc
,
CARD8
,
srcStride
,
srcLine
,
1
);
fbComposeGetStart
(
pDst
,
xDst
,
yDst
,
CARD8
,
dstStride
,
dstLine
,
1
);
while
(
height
--
)
{
dst
=
dstLine
;
dstLine
+=
dstStride
;
src
=
srcLine
;
srcLine
+=
srcStride
;
w
=
width
;
while
(
w
&&
(
unsigned
long
)
dst
&
7
)
{
s
=
*
src
;
d
=
*
dst
;
t
=
d
+
s
;
s
=
t
|
(
0
-
(
t
>>
8
));
*
dst
=
s
;
dst
++
;
src
++
;
w
--
;
}
while
(
w
>=
8
)
{
*
(
__m64
*
)
dst
=
_mm_adds_pu8
(
*
(
__m64
*
)
src
,
*
(
__m64
*
)
dst
);
dst
+=
8
;
src
+=
8
;
w
-=
8
;
}
while
(
w
)
{
s
=
*
src
;
d
=
*
dst
;
t
=
d
+
s
;
s
=
t
|
(
0
-
(
t
>>
8
));
*
dst
=
s
;
dst
++
;
src
++
;
w
--
;
}
}
_mm_empty
();
}
void
fbCompositeSrcAdd_8888x8888mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
)
{
CARD32
*
dstLine
,
*
dst
;
CARD32
*
srcLine
,
*
src
;
FbStride
dstStride
,
srcStride
;
CARD16
w
;
CHECKPOINT
();
fbComposeGetStart
(
pSrc
,
xSrc
,
ySrc
,
CARD32
,
srcStride
,
srcLine
,
1
);
fbComposeGetStart
(
pDst
,
xDst
,
yDst
,
CARD32
,
dstStride
,
dstLine
,
1
);
while
(
height
--
)
{
dst
=
dstLine
;
dstLine
+=
dstStride
;
src
=
srcLine
;
srcLine
+=
srcStride
;
w
=
width
;
while
(
w
&&
(
unsigned
long
)
dst
&
7
)
{
*
dst
=
_mm_cvtsi64_si32
(
_mm_adds_pu8
(
_mm_cvtsi32_si64
(
*
src
),
_mm_cvtsi32_si64
(
*
dst
)));
dst
++
;
src
++
;
w
--
;
}
while
(
w
>=
2
)
{
*
(
ullong
*
)
dst
=
(
ullong
)
_mm_adds_pu8
(
*
(
__m64
*
)
src
,
*
(
__m64
*
)
dst
);
dst
+=
2
;
src
+=
2
;
w
-=
2
;
}
if
(
w
)
{
*
dst
=
_mm_cvtsi64_si32
(
_mm_adds_pu8
(
_mm_cvtsi32_si64
(
*
src
),
_mm_cvtsi32_si64
(
*
dst
)));
}
}
_mm_empty
();
}
Bool
fbSolidFillmmx
(
DrawablePtr
pDraw
,
int
x
,
int
y
,
int
width
,
int
height
,
FbBits
xor
)
{
FbStride
stride
;
int
bpp
;
ullong
fill
;
__m64
vfill
;
CARD32
byte_width
;
CARD8
*
byte_line
;
FbBits
*
bits
;
int
xoff
,
yoff
;
CHECKPOINT
();
fbGetDrawable
(
pDraw
,
bits
,
stride
,
bpp
,
xoff
,
yoff
);
if
(
bpp
==
16
&&
(
xor
>>
16
!=
(
xor
&
0xffff
)))
return
FALSE
;
if
(
bpp
!=
16
&&
bpp
!=
32
)
return
FALSE
;
if
(
bpp
==
16
)
{
stride
=
stride
*
sizeof
(
FbBits
)
/
2
;
byte_line
=
(
CARD8
*
)(((
CARD16
*
)
bits
)
+
stride
*
(
y
+
yoff
)
+
(
x
+
xoff
));
byte_width
=
2
*
width
;
stride
*=
2
;
}
else
{
stride
=
stride
*
sizeof
(
FbBits
)
/
4
;
byte_line
=
(
CARD8
*
)(((
CARD32
*
)
bits
)
+
stride
*
(
y
+
yoff
)
+
(
x
+
xoff
));
byte_width
=
4
*
width
;
stride
*=
4
;
}
fill
=
((
ullong
)
xor
<<
32
)
|
xor
;
vfill
=
(
__m64
)
fill
;
while
(
height
--
)
{
int
w
;
CARD8
*
d
=
byte_line
;
byte_line
+=
stride
;
w
=
byte_width
;
while
(
w
>=
2
&&
((
unsigned
long
)
d
&
3
))
{
*
(
CARD16
*
)
d
=
xor
;
w
-=
2
;
d
+=
2
;
}
while
(
w
>=
4
&&
((
unsigned
long
)
d
&
7
))
{
*
(
CARD32
*
)
d
=
xor
;
w
-=
4
;
d
+=
4
;
}
while
(
w
>=
64
)
{
*
(
__m64
*
)
(
d
+
0
)
=
vfill
;
*
(
__m64
*
)
(
d
+
8
)
=
vfill
;
*
(
__m64
*
)
(
d
+
16
)
=
vfill
;
*
(
__m64
*
)
(
d
+
24
)
=
vfill
;
*
(
__m64
*
)
(
d
+
32
)
=
vfill
;
*
(
__m64
*
)
(
d
+
40
)
=
vfill
;
*
(
__m64
*
)
(
d
+
48
)
=
vfill
;
*
(
__m64
*
)
(
d
+
56
)
=
vfill
;
w
-=
64
;
d
+=
64
;
}
while
(
w
>=
4
)
{
*
(
CARD32
*
)
d
=
xor
;
w
-=
4
;
d
+=
4
;
}
if
(
w
>=
2
)
{
*
(
CARD16
*
)
d
=
xor
;
w
-=
2
;
d
+=
2
;
}
}
_mm_empty
();
return
TRUE
;
}
Bool
fbCopyAreammx
(
DrawablePtr
pSrc
,
DrawablePtr
pDst
,
int
src_x
,
int
src_y
,
int
dst_x
,
int
dst_y
,
int
width
,
int
height
)
{
FbBits
*
src_bits
;
FbStride
src_stride
;
int
src_bpp
;
int
src_xoff
;
int
src_yoff
;
FbBits
*
dst_bits
;
FbStride
dst_stride
;
int
dst_bpp
;
int
dst_xoff
;
int
dst_yoff
;
CARD8
*
src_bytes
;
CARD8
*
dst_bytes
;
int
byte_width
;
fbGetDrawable
(
pSrc
,
src_bits
,
src_stride
,
src_bpp
,
src_xoff
,
src_yoff
);
fbGetDrawable
(
pDst
,
dst_bits
,
dst_stride
,
dst_bpp
,
dst_xoff
,
dst_yoff
);
if
(
src_bpp
!=
dst_bpp
)
return
FALSE
;
if
(
src_bpp
==
16
)
{
src_stride
=
src_stride
*
sizeof
(
FbBits
)
/
2
;
dst_stride
=
dst_stride
*
sizeof
(
FbBits
)
/
2
;
src_bytes
=
(
CARD8
*
)(((
CARD16
*
)
src_bits
)
+
src_stride
*
(
src_y
+
src_yoff
)
+
(
src_x
+
src_xoff
));
dst_bytes
=
(
CARD8
*
)(((
CARD16
*
)
dst_bits
)
+
dst_stride
*
(
dst_y
+
dst_yoff
)
+
(
dst_x
+
dst_xoff
));
byte_width
=
2
*
width
;
src_stride
*=
2
;
dst_stride
*=
2
;
}
else
if
(
src_bpp
==
32
)
{
src_stride
=
src_stride
*
sizeof
(
FbBits
)
/
4
;
dst_stride
=
dst_stride
*
sizeof
(
FbBits
)
/
4
;
src_bytes
=
(
CARD8
*
)(((
CARD32
*
)
src_bits
)
+
src_stride
*
(
src_y
+
src_yoff
)
+
(
src_x
+
src_xoff
));
dst_bytes
=
(
CARD8
*
)(((
CARD32
*
)
dst_bits
)
+
dst_stride
*
(
dst_y
+
dst_yoff
)
+
(
dst_x
+
dst_xoff
));
byte_width
=
4
*
width
;
src_stride
*=
4
;
dst_stride
*=
4
;
}
else
{
return
FALSE
;
}
while
(
height
--
)
{
int
w
;
CARD8
*
s
=
src_bytes
;
CARD8
*
d
=
dst_bytes
;
src_bytes
+=
src_stride
;
dst_bytes
+=
dst_stride
;
w
=
byte_width
;
while
(
w
>=
2
&&
((
unsigned
long
)
d
&
3
))
{
*
(
CARD16
*
)
d
=
*
(
CARD16
*
)
s
;
w
-=
2
;
s
+=
2
;
d
+=
2
;
}
while
(
w
>=
4
&&
((
unsigned
long
)
d
&
7
))
{
*
(
CARD32
*
)
d
=
*
(
CARD32
*
)
s
;
w
-=
4
;
s
+=
4
;
d
+=
4
;
}
while
(
w
>=
64
)
{
*
(
__m64
*
)(
d
+
0
)
=
*
(
__m64
*
)(
s
+
0
);
*
(
__m64
*
)(
d
+
8
)
=
*
(
__m64
*
)(
s
+
8
);
*
(
__m64
*
)(
d
+
16
)
=
*
(
__m64
*
)(
s
+
16
);
*
(
__m64
*
)(
d
+
24
)
=
*
(
__m64
*
)(
s
+
24
);
*
(
__m64
*
)(
d
+
32
)
=
*
(
__m64
*
)(
s
+
32
);
*
(
__m64
*
)(
d
+
40
)
=
*
(
__m64
*
)(
s
+
40
);
*
(
__m64
*
)(
d
+
48
)
=
*
(
__m64
*
)(
s
+
48
);
*
(
__m64
*
)(
d
+
56
)
=
*
(
__m64
*
)(
s
+
56
);
w
-=
64
;
s
+=
64
;
d
+=
64
;
}
while
(
w
>=
4
)
{
*
(
CARD32
*
)
d
=
*
(
CARD32
*
)
s
;
w
-=
4
;
s
+=
4
;
d
+=
4
;
}
if
(
w
>=
2
)
{
*
(
CARD16
*
)
d
=
*
(
CARD16
*
)
s
;
w
-=
2
;
s
+=
2
;
d
+=
2
;
}
}
_mm_empty
();
return
TRUE
;
}
void
fbCompositeCopyAreammx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
)
{
fbCopyAreammx
(
pSrc
->
pDrawable
,
pDst
->
pDrawable
,
xSrc
,
ySrc
,
xDst
,
yDst
,
width
,
height
);
}
#endif
/* RENDER */
#endif
/* USE_MMX */
nx-X11/programs/Xserver/fb/fbmmx.h
deleted
100644 → 0
View file @
d12b7754
/*
* Copyright © 2004 Red Hat, Inc.
* Copyright © 2005 Trolltech AS
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Red Hat not be used in advertising or
* publicity pertaining to distribution of the software without specific,
* written prior permission. Red Hat makes no representations about the
* suitability of this software for any purpose. It is provided "as is"
* without express or implied warranty.
*
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*
* Author: Søren Sandmann (sandmann@redhat.com)
* Lars Knoll (lars@trolltech.com)
*
* Based on work by Owen Taylor
*/
#ifdef HAVE_DIX_CONFIG_H
#include <dix-config.h>
#endif
#ifdef USE_MMX
#if !defined(__amd64__) && !defined(__x86_64__)
Bool
fbHaveMMX
(
void
);
#else
#define fbHaveMMX() TRUE
#endif
#else
#define fbHaveMMX() FALSE
#endif
#ifdef USE_MMX
void
fbComposeSetupMMX
(
void
);
void
fbCompositeSolidMask_nx8888x0565Cmmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
);
void
fbCompositeSrcAdd_8888x8888mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
);
void
fbCompositeSrc_8888x8888mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
);
void
fbCompositeSolidMask_nx8888x8888Cmmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
);
void
fbCompositeSolidMask_nx8x8888mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
);
void
fbCompositeSrcAdd_8000x8000mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
);
void
fbCompositeSrc_8888RevNPx8888mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
);
void
fbCompositeSrc_8888RevNPx0565mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
);
void
fbCompositeSolid_nx8888mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
);
void
fbCompositeSolid_nx0565mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
);
void
fbCompositeSolidMask_nx8x0565mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
);
void
fbCompositeSrc_8888x8x8888mmx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
);
Bool
fbCopyAreammx
(
DrawablePtr
pSrc
,
DrawablePtr
pDst
,
int
src_x
,
int
src_y
,
int
dst_x
,
int
dst_y
,
int
width
,
int
height
);
void
fbCompositeCopyAreammx
(
CARD8
op
,
PicturePtr
pSrc
,
PicturePtr
pMask
,
PicturePtr
pDst
,
INT16
xSrc
,
INT16
ySrc
,
INT16
xMask
,
INT16
yMask
,
INT16
xDst
,
INT16
yDst
,
CARD16
width
,
CARD16
height
);
Bool
fbSolidFillmmx
(
DrawablePtr
pDraw
,
int
x
,
int
y
,
int
width
,
int
height
,
FbBits
xor
);
#endif
/* USE_MMX */
nx-X11/programs/Xserver/fb/fbpict.c
View file @
53d304ef
...
...
@@ -34,7 +34,6 @@
#include "picturestr.h"
#include "mipict.h"
#include "fbpict.h"
#include "fbmmx.h"
typedef
void
(
*
CompositeFunc
)
(
CARD8
op
,
PicturePtr
pSrc
,
...
...
@@ -850,14 +849,6 @@ fbComposite (CARD8 op,
int
x_msk
,
y_msk
,
x_src
,
y_src
,
x_dst
,
y_dst
;
int
w
,
h
,
w_this
,
h_this
;
#ifdef USE_MMX
static
Bool
mmx_setup
=
FALSE
;
if
(
!
mmx_setup
)
{
fbComposeSetupMMX
();
mmx_setup
=
TRUE
;
}
#endif
xDst
+=
pDst
->
pDrawable
->
x
;
yDst
+=
pDst
->
pDrawable
->
y
;
if
(
pSrc
->
pDrawable
)
{
...
...
@@ -879,13 +870,6 @@ fbComposite (CARD8 op,
&&
(
!
pMask
||
pMask
->
filter
!=
PictFilterConvolution
))
switch
(
op
)
{
case
PictOpSrc
:
#ifdef USE_MMX
if
(
!
pMask
&&
pSrc
->
format
==
pDst
->
format
&&
pSrc
->
format
!=
PICT_a8
&&
pSrc
->
pDrawable
!=
pDst
->
pDrawable
)
{
func
=
fbCompositeCopyAreammx
;
}
#endif
break
;
case
PictOpOver
:
if
(
pMask
)
...
...
@@ -901,11 +885,6 @@ fbComposite (CARD8 op,
switch
(
pDst
->
format
)
{
case
PICT_r5g6b5
:
case
PICT_b5g6r5
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSolidMask_nx8x0565mmx
;
else
#endif
func
=
fbCompositeSolidMask_nx8x0565
;
break
;
case
PICT_r8g8b8
:
...
...
@@ -916,11 +895,6 @@ fbComposite (CARD8 op,
case
PICT_x8r8g8b8
:
case
PICT_a8b8g8r8
:
case
PICT_x8b8g8r8
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSolidMask_nx8x8888mmx
;
else
#endif
func
=
fbCompositeSolidMask_nx8x8888
;
break
;
}
...
...
@@ -930,19 +904,9 @@ fbComposite (CARD8 op,
switch
(
pDst
->
format
)
{
case
PICT_a8r8g8b8
:
case
PICT_x8r8g8b8
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSolidMask_nx8888x8888Cmmx
;
else
#endif
func
=
fbCompositeSolidMask_nx8888x8888C
;
break
;
case
PICT_r5g6b5
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSolidMask_nx8888x0565Cmmx
;
else
#endif
func
=
fbCompositeSolidMask_nx8888x0565C
;
break
;
}
...
...
@@ -953,19 +917,9 @@ fbComposite (CARD8 op,
switch
(
pDst
->
format
)
{
case
PICT_a8b8g8r8
:
case
PICT_x8b8g8r8
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSolidMask_nx8888x8888Cmmx
;
else
#endif
func
=
fbCompositeSolidMask_nx8888x8888C
;
break
;
case
PICT_b5g6r5
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSolidMask_nx8888x0565Cmmx
;
else
#endif
func
=
fbCompositeSolidMask_nx8888x0565C
;
break
;
}
...
...
@@ -1003,16 +957,8 @@ fbComposite (CARD8 op,
switch
(
pDst
->
format
)
{
case
PICT_a8r8g8b8
:
case
PICT_x8r8g8b8
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSrc_8888RevNPx8888mmx
;
#endif
break
;
case
PICT_r5g6b5
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSrc_8888RevNPx0565mmx
;
#endif
break
;
}
break
;
...
...
@@ -1025,16 +971,8 @@ fbComposite (CARD8 op,
switch
(
pDst
->
format
)
{
case
PICT_a8b8g8r8
:
case
PICT_x8b8g8r8
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSrc_8888RevNPx8888mmx
;
#endif
break
;
case
PICT_r5g6b5
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSrc_8888RevNPx0565mmx
;
#endif
break
;
}
break
;
...
...
@@ -1054,10 +992,6 @@ fbComposite (CARD8 op,
pDst
->
format
==
PICT_x8r8g8b8
&&
pMask
->
format
==
PICT_a8
)
{
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSrc_8888x8x8888mmx
;
#endif
}
}
}
...
...
@@ -1075,22 +1009,8 @@ fbComposite (CARD8 op,
switch
(
pDst
->
format
)
{
case
PICT_a8r8g8b8
:
case
PICT_x8r8g8b8
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
{
srcRepeat
=
FALSE
;
func
=
fbCompositeSolid_nx8888mmx
;
}
#endif
break
;
case
PICT_r5g6b5
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
{
srcRepeat
=
FALSE
;
func
=
fbCompositeSolid_nx0565mmx
;
}
#endif
break
;
}
break
;
...
...
@@ -1103,11 +1023,6 @@ fbComposite (CARD8 op,
switch
(
pDst
->
format
)
{
case
PICT_a8r8g8b8
:
case
PICT_x8r8g8b8
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSrc_8888x8888mmx
;
else
#endif
func
=
fbCompositeSrc_8888x8888
;
break
;
case
PICT_r8g8b8
:
...
...
@@ -1122,20 +1037,12 @@ fbComposite (CARD8 op,
switch
(
pDst
->
format
)
{
case
PICT_a8r8g8b8
:
case
PICT_x8r8g8b8
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeCopyAreammx
;
#endif
break
;
}
case
PICT_x8b8g8r8
:
switch
(
pDst
->
format
)
{
case
PICT_a8b8g8r8
:
case
PICT_x8b8g8r8
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeCopyAreammx
;
#endif
break
;
}
break
;
...
...
@@ -1143,11 +1050,6 @@ fbComposite (CARD8 op,
switch
(
pDst
->
format
)
{
case
PICT_a8b8g8r8
:
case
PICT_x8b8g8r8
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSrc_8888x8888mmx
;
else
#endif
func
=
fbCompositeSrc_8888x8888
;
break
;
case
PICT_b8g8r8
:
...
...
@@ -1183,11 +1085,6 @@ fbComposite (CARD8 op,
case
PICT_a8r8g8b8
:
switch
(
pDst
->
format
)
{
case
PICT_a8r8g8b8
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSrcAdd_8888x8888mmx
;
else
#endif
func
=
fbCompositeSrcAdd_8888x8888
;
break
;
}
...
...
@@ -1195,11 +1092,6 @@ fbComposite (CARD8 op,
case
PICT_a8b8g8r8
:
switch
(
pDst
->
format
)
{
case
PICT_a8b8g8r8
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSrcAdd_8888x8888mmx
;
else
#endif
func
=
fbCompositeSrcAdd_8888x8888
;
break
;
}
...
...
@@ -1207,11 +1099,6 @@ fbComposite (CARD8 op,
case
PICT_a8
:
switch
(
pDst
->
format
)
{
case
PICT_a8
:
#ifdef USE_MMX
if
(
fbHaveMMX
())
func
=
fbCompositeSrcAdd_8000x8000mmx
;
else
#endif
func
=
fbCompositeSrcAdd_8000x8000
;
break
;
}
...
...
@@ -1336,121 +1223,3 @@ fbPictureInit (ScreenPtr pScreen, PictFormatPtr formats, int nformats)
return
TRUE
;
}
#ifdef USE_MMX
/* The CPU detection code needs to be in a file not compiled with
* "-mmmx -msse", as gcc would generate CMOV instructions otherwise
* that would lead to SIGILL instructions on old CPUs that don't have
* it.
*/
#if !defined(__amd64__) && !defined(__x86_64__)
enum
CPUFeatures
{
NoFeatures
=
0
,
MMX
=
0x1
,
MMX_Extensions
=
0x2
,
SSE
=
0x6
,
SSE2
=
0x8
,
CMOV
=
0x10
};
static
unsigned
int
detectCPUFeatures
(
void
)
{
unsigned
int
result
;
char
vendor
[
13
];
vendor
[
0
]
=
0
;
vendor
[
12
]
=
0
;
/* see p. 118 of amd64 instruction set manual Vol3 */
/* We need to be careful about the handling of %ebx and
* %esp here. We can't declare either one as clobbered
* since they are special registers (%ebx is the "PIC
* register" holding an offset to global data, %esp the
* stack pointer), so we need to make sure they have their+ * original values when we access the output operands.
*/
__asm__
(
"pushf
\n
"
"pop %%eax
\n
"
"mov %%eax, %%ecx
\n
"
"xor $0x00200000, %%eax
\n
"
"push %%eax
\n
"
"popf
\n
"
"pushf
\n
"
"pop %%eax
\n
"
"mov $0x0, %%edx
\n
"
"xor %%ecx, %%eax
\n
"
"jz 1
\n
"
"mov $0x00000000, %%eax
\n
"
"push %%ebx
\n
"
"cpuid
\n
"
"mov %%ebx, %%eax
\n
"
"pop %%ebx
\n
"
"mov %%eax, %1
\n
"
"mov %%edx, %2
\n
"
"mov %%ecx, %3
\n
"
"mov $0x00000001, %%eax
\n
"
"push %%ebx
\n
"
"cpuid
\n
"
"pop %%ebx
\n
"
"1:
\n
"
"mov %%edx, %0
\n
"
:
"=r"
(
result
),
"=m"
(
vendor
[
0
]),
"=m"
(
vendor
[
4
]),
"=m"
(
vendor
[
8
])
:
:
"%eax"
,
"%ecx"
,
"%edx"
);
unsigned
int
features
=
0
;
if
(
result
)
{
/* result now contains the standard feature bits */
if
(
result
&
(
1
<<
15
))
features
|=
CMOV
;
if
(
result
&
(
1
<<
23
))
features
|=
MMX
;
if
(
result
&
(
1
<<
25
))
features
|=
SSE
;
if
(
result
&
(
1
<<
26
))
features
|=
SSE2
;
if
((
result
&
MMX
)
&&
!
(
result
&
SSE
)
&&
(
strcmp
(
vendor
,
"AuthenticAMD"
)
==
0
))
{
/* check for AMD MMX extensions */
unsigned
int
result
;
__asm__
(
"push %%ebx
\n
"
"mov $0x80000000, %%eax
\n
"
"cpuid
\n
"
"xor %%edx, %%edx
\n
"
"cmp $0x1, %%eax
\n
"
"jge 2
\n
"
"mov $0x80000001, %%eax
\n
"
"cpuid
\n
"
"2:
\n
"
"pop %%ebx
\n
"
"mov %%edx, %0
\n
"
:
"=r"
(
result
)
:
:
"%eax"
,
"%ecx"
,
"%edx"
);
if
(
result
&
(
1
<<
22
))
features
|=
MMX_Extensions
;
}
}
return
features
;
}
Bool
fbHaveMMX
(
void
)
{
static
Bool
initialized
=
FALSE
;
static
Bool
mmx_present
;
if
(
!
initialized
)
{
unsigned
int
features
=
detectCPUFeatures
();
mmx_present
=
(
features
&
(
MMX
|
MMX_Extensions
))
==
(
MMX
|
MMX_Extensions
);
initialized
=
TRUE
;
}
return
mmx_present
;
}
#endif
/* __amd64__ */
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment