291 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			291 lines
		
	
	
		
			7.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*	mmx.c
 | |
| 
 | |
| 	MultiMedia eXtensions GCC interface library for IA32.
 | |
| 
 | |
| 	To use this library, simply include this header file
 | |
| 	and compile with GCC.  You MUST have inlining enabled
 | |
| 	in order for mmx_ok() to work; this can be done by
 | |
| 	simply using -O on the GCC command line.
 | |
| 
 | |
| 	Compiling with -DMMX_TRACE will cause detailed trace
 | |
| 	output to be sent to stderr for each mmx operation.
 | |
| 	This adds lots of code, and obviously slows execution to
 | |
| 	a crawl, but can be very useful for debugging.
 | |
| 
 | |
| 	THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
 | |
| 	EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
 | |
| 	LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 | |
| 	AND FITNESS FOR ANY PARTICULAR PURPOSE.
 | |
| 
 | |
| 	1997-99 by H. Dietz and R. Fisher
 | |
| 
 | |
|  Notes:
 | |
| 	It appears that the latest gas has the pand problem fixed, therefore
 | |
| 	  I'll undefine BROKEN_PAND by default.
 | |
| */
 | |
| 
 | |
| #ifdef HAVE_CONFIG_H
 | |
| #include "config.h"
 | |
| #endif
 | |
| 
 | |
| #include "goom_config.h"
 | |
| 
 | |
| #ifdef HAVE_MMX
 | |
| 
 | |
| #define BUFFPOINTNB 16
 | |
| #define BUFFPOINTMASK 0xffff
 | |
| #define BUFFINCR 0xff
 | |
| 
 | |
| #include "mmx.h"
 | |
| #include "goom_graphic.h"
 | |
| 
 | |
| #define sqrtperte 16
 | |
| // faire : a % sqrtperte <=> a & pertemask
 | |
| #define PERTEMASK 0xf
 | |
| // faire : a / sqrtperte <=> a >> PERTEDEC
 | |
| #define PERTEDEC 4
 | |
| 
 | |
| int
 | |
| mmx_supported (void)
 | |
| {
 | |
|   return (mm_support () & 0x1);
 | |
| }
 | |
| 
 | |
| void
 | |
| zoom_filter_mmx (int prevX, int prevY,
 | |
|     Pixel * expix1, Pixel * expix2,
 | |
|     int *brutS, int *brutD, int buffratio, int precalCoef[16][16])
 | |
| {
 | |
|   unsigned int ax = (prevX - 1) << PERTEDEC, ay = (prevY - 1) << PERTEDEC;
 | |
| 
 | |
|   int bufsize = prevX * prevY;
 | |
|   int loop;
 | |
| 
 | |
|   __asm__ __volatile__ ("pxor %mm7,%mm7");
 | |
| 
 | |
|   for (loop = 0; loop < bufsize; loop++) {
 | |
|     /*      int couleur; */
 | |
|     int px, py;
 | |
|     int pos;
 | |
|     int coeffs;
 | |
| 
 | |
|     int myPos = loop << 1, myPos2 = myPos + 1;
 | |
|     int brutSmypos = brutS[myPos];
 | |
| 
 | |
|     px = brutSmypos + (((brutD[myPos] -
 | |
|                 brutSmypos) * buffratio) >> BUFFPOINTNB);
 | |
|     brutSmypos = brutS[myPos2];
 | |
|     py = brutSmypos + (((brutD[myPos2] -
 | |
|                 brutSmypos) * buffratio) >> BUFFPOINTNB);
 | |
| 
 | |
|     if ((py >= ay) || (px >= ax)) {
 | |
|       pos = coeffs = 0;
 | |
|     } else {
 | |
|       pos = ((px >> PERTEDEC) + prevX * (py >> PERTEDEC));
 | |
|       // coef en modulo 15
 | |
|       coeffs = precalCoef[px & PERTEMASK][py & PERTEMASK];
 | |
|     }
 | |
| 
 | |
|     __asm__ __volatile__ ("movd %2, %%mm6 \n\t"
 | |
|         /* recuperation des deux premiers pixels dans mm0 et mm1 */
 | |
|         "movq (%3,%1,4), %%mm0 \n\t"    /* b1-v1-r1-a1-b2-v2-r2-a2 */
 | |
|         "movq %%mm0, %%mm1 \n\t"        /* b1-v1-r1-a1-b2-v2-r2-a2 */
 | |
|         /* depackage du premier pixel */
 | |
|         "punpcklbw %%mm7, %%mm0 \n\t"   /* 00-b2-00-v2-00-r2-00-a2 */
 | |
|         "movq %%mm6, %%mm5 \n\t"        /* ??-??-??-??-c4-c3-c2-c1 */
 | |
|         /* depackage du 2ieme pixel */
 | |
|         "punpckhbw %%mm7, %%mm1 \n\t"   /* 00-b1-00-v1-00-r1-00-a1 */
 | |
|         /* extraction des coefficients... */
 | |
|         "punpcklbw %%mm5, %%mm6 \n\t"   /* c4-c4-c3-c3-c2-c2-c1-c1 */
 | |
|         "movq %%mm6, %%mm4 \n\t"        /* c4-c4-c3-c3-c2-c2-c1-c1 */
 | |
|         "movq %%mm6, %%mm5 \n\t"        /* c4-c4-c3-c3-c2-c2-c1-c1 */
 | |
|         "punpcklbw %%mm5, %%mm6 \n\t"   /* c2-c2-c2-c2-c1-c1-c1-c1 */
 | |
|         "punpckhbw %%mm5, %%mm4 \n\t"   /* c4-c4-c4-c4-c3-c3-c3-c3 */
 | |
|         "movq %%mm6, %%mm3 \n\t"        /* c2-c2-c2-c2-c1-c1-c1-c1 */
 | |
|         "punpcklbw %%mm7, %%mm6 \n\t"   /* 00-c1-00-c1-00-c1-00-c1 */
 | |
|         "punpckhbw %%mm7, %%mm3 \n\t"   /* 00-c2-00-c2-00-c2-00-c2 */
 | |
|         /* multiplication des pixels par les coefficients */
 | |
|         "pmullw %%mm6, %%mm0 \n\t"      /* c1*b2-c1*v2-c1*r2-c1*a2 */
 | |
|         "pmullw %%mm3, %%mm1 \n\t"      /* c2*b1-c2*v1-c2*r1-c2*a1 */
 | |
|         "paddw %%mm1, %%mm0 \n\t"
 | |
|         /* ...extraction des 2 derniers coefficients */
 | |
|         "movq %%mm4, %%mm5 \n\t"        /* c4-c4-c4-c4-c3-c3-c3-c3 */
 | |
|         "punpcklbw %%mm7, %%mm4 \n\t"   /* 00-c3-00-c3-00-c3-00-c3 */
 | |
|         "punpckhbw %%mm7, %%mm5 \n\t"   /* 00-c4-00-c4-00-c4-00-c4 */
 | |
|         /* ajouter la longueur de ligne a esi */
 | |
|         "addl 8(%%ebp),%1 \n\t"
 | |
|         /* recuperation des 2 derniers pixels */
 | |
|         "movq (%3,%1,4), %%mm1 \n\t" "movq %%mm1, %%mm2 \n\t"
 | |
|         /* depackage des pixels */
 | |
|         "punpcklbw %%mm7, %%mm1 \n\t" "punpckhbw %%mm7, %%mm2 \n\t"
 | |
|         /* multiplication pas les coeffs */
 | |
|         "pmullw %%mm4, %%mm1 \n\t" "pmullw %%mm5, %%mm2 \n\t"
 | |
|         /* ajout des valeurs obtenues ? la valeur finale */
 | |
|         "paddw %%mm1, %%mm0 \n\t" "paddw %%mm2, %%mm0 \n\t"
 | |
|         /* division par 256 = 16+16+16+16, puis repackage du pixel final */
 | |
|         "psrlw $8, %%mm0 \n\t"
 | |
|         "packuswb %%mm7, %%mm0 \n\t" "movd %%mm0,%0 \n\t":"=g" (expix2[loop])
 | |
|         :"r" (pos), "r" (coeffs), "r" (expix1)
 | |
|         );
 | |
| 
 | |
|     emms ();
 | |
|   }
 | |
| }
 | |
| 
 | |
| #define DRAWMETHOD_PLUS_MMX(_out,_backbuf,_col) \
 | |
| { \
 | |
| 	movd_m2r(_backbuf, mm0); \
 | |
| 	paddusb_m2r(_col, mm0); \
 | |
| 	movd_r2m(mm0, _out); \
 | |
| }
 | |
| 
 | |
| #define DRAWMETHOD DRAWMETHOD_PLUS_MMX(*p,*p,col)
 | |
| 
 | |
| void
 | |
| draw_line_mmx (Pixel * data, int x1, int y1, int x2, int y2, int col,
 | |
|     int screenx, int screeny)
 | |
| {
 | |
|   int x, y, dx, dy, yy, xx;
 | |
|   Pixel *p;
 | |
| 
 | |
|   if ((y1 < 0) || (y2 < 0) || (x1 < 0) || (x2 < 0) || (y1 >= screeny)
 | |
|       || (y2 >= screeny) || (x1 >= screenx) || (x2 >= screenx))
 | |
|     goto end_of_line;
 | |
| 
 | |
|   dx = x2 - x1;
 | |
|   dy = y2 - y1;
 | |
|   if (x1 >= x2) {
 | |
|     int tmp;
 | |
| 
 | |
|     tmp = x1;
 | |
|     x1 = x2;
 | |
|     x2 = tmp;
 | |
|     tmp = y1;
 | |
|     y1 = y2;
 | |
|     y2 = tmp;
 | |
|     dx = x2 - x1;
 | |
|     dy = y2 - y1;
 | |
|   }
 | |
| 
 | |
|   /* vertical line */
 | |
|   if (dx == 0) {
 | |
|     if (y1 < y2) {
 | |
|       p = &(data[(screenx * y1) + x1]);
 | |
|       for (y = y1; y <= y2; y++) {
 | |
|         DRAWMETHOD;
 | |
|         p += screenx;
 | |
|       }
 | |
|     } else {
 | |
|       p = &(data[(screenx * y2) + x1]);
 | |
|       for (y = y2; y <= y1; y++) {
 | |
|         DRAWMETHOD;
 | |
|         p += screenx;
 | |
|       }
 | |
|     }
 | |
|     goto end_of_line;
 | |
|   }
 | |
|   /* horizontal line */
 | |
|   if (dy == 0) {
 | |
|     if (x1 < x2) {
 | |
|       p = &(data[(screenx * y1) + x1]);
 | |
|       for (x = x1; x <= x2; x++) {
 | |
|         DRAWMETHOD;
 | |
|         p++;
 | |
|       }
 | |
|       goto end_of_line;
 | |
|     } else {
 | |
|       p = &(data[(screenx * y1) + x2]);
 | |
|       for (x = x2; x <= x1; x++) {
 | |
|         DRAWMETHOD;
 | |
|         p++;
 | |
|       }
 | |
|       goto end_of_line;
 | |
|     }
 | |
|   }
 | |
|   /* 1    */
 | |
|   /*  \   */
 | |
|   /*   \  */
 | |
|   /*    2 */
 | |
|   if (y2 > y1) {
 | |
|     /* steep */
 | |
|     if (dy > dx) {
 | |
|       dx = ((dx << 16) / dy);
 | |
|       x = x1 << 16;
 | |
|       for (y = y1; y <= y2; y++) {
 | |
|         xx = x >> 16;
 | |
|         p = &(data[(screenx * y) + xx]);
 | |
|         DRAWMETHOD;
 | |
|         if (xx < (screenx - 1)) {
 | |
|           p++;
 | |
|           /* DRAWMETHOD; */
 | |
|         }
 | |
|         x += dx;
 | |
|       }
 | |
|       goto end_of_line;
 | |
|     }
 | |
|     /* shallow */
 | |
|     else {
 | |
|       dy = ((dy << 16) / dx);
 | |
|       y = y1 << 16;
 | |
|       for (x = x1; x <= x2; x++) {
 | |
|         yy = y >> 16;
 | |
|         p = &(data[(screenx * yy) + x]);
 | |
|         DRAWMETHOD;
 | |
|         if (yy < (screeny - 1)) {
 | |
|           p += screeny;
 | |
|           /* DRAWMETHOD; */
 | |
|         }
 | |
|         y += dy;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   /*    2 */
 | |
|   /*   /  */
 | |
|   /*  /   */
 | |
|   /* 1    */
 | |
|   else {
 | |
|     /* steep */
 | |
|     if (-dy > dx) {
 | |
|       dx = ((dx << 16) / -dy);
 | |
|       x = (x1 + 1) << 16;
 | |
|       for (y = y1; y >= y2; y--) {
 | |
|         xx = x >> 16;
 | |
|         p = &(data[(screenx * y) + xx]);
 | |
|         DRAWMETHOD;
 | |
|         if (xx < (screenx - 1)) {
 | |
|           p--;
 | |
|           /* DRAWMETHOD; */
 | |
|         }
 | |
|         x += dx;
 | |
|       }
 | |
|       goto end_of_line;
 | |
|     }
 | |
|     /* shallow */
 | |
|     else {
 | |
|       dy = ((dy << 16) / dx);
 | |
|       y = y1 << 16;
 | |
|       for (x = x1; x <= x2; x++) {
 | |
|         yy = y >> 16;
 | |
|         p = &(data[(screenx * yy) + x]);
 | |
|         DRAWMETHOD;
 | |
|         if (yy < (screeny - 1)) {
 | |
|           p += screeny;
 | |
|           /* DRAWMETHOD; */
 | |
|         }
 | |
|         y += dy;
 | |
|       }
 | |
|       goto end_of_line;
 | |
|     }
 | |
|   }
 | |
| end_of_line:
 | |
|   emms ();
 | |
|   /* __asm__ __volatile__ ("emms"); */
 | |
| }
 | |
| #else
 | |
| int
 | |
| mmx_supported (void)
 | |
| {
 | |
|   return (0);
 | |
| }
 | |
| #endif /* HAVE_MMX */
 |